summaryrefslogtreecommitdiff
path: root/tex/context/base/lang-def.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/lang-def.lua')
-rw-r--r--tex/context/base/lang-def.lua418
1 files changed, 418 insertions, 0 deletions
diff --git a/tex/context/base/lang-def.lua b/tex/context/base/lang-def.lua
new file mode 100644
index 000000000..4fdcdf8a7
--- /dev/null
+++ b/tex/context/base/lang-def.lua
@@ -0,0 +1,418 @@
+if not modules then modules = { } end modules ['lang-ini'] = {
+ version = 1.001,
+ comment = "companion to lang-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local lower = string.lower
+
+languages = languages or { }
+local languages = languages
+
+languages.data = languages.data or utilities.storage.allocate { }
+local data = languages.data
+
+-- The specifications are based on an analysis done by Arthur. The
+-- names of tags were changed by Hans. The data is not yet used but
+-- will be some day.
+--
+-- description
+--
+-- The description is only meant as an indication; for example 'no' is
+-- "Norwegian, undetermined" because that's really what it is.
+--
+-- script
+--
+-- This is the 4-letter script tag according to ISO 15924, the
+-- official standard.
+--
+-- bibliographical and terminological
+--
+-- Then we have *two* ISO-639 3-letter tags: one is supposed to be used
+-- for "bibliographical" purposes, the other for "terminological". The
+-- first one is quite special (and mostly used in American libraries),
+-- and the more interesting one is the other (apparently it's that one
+-- we find everywhere).
+--
+-- context
+--
+-- These are the ones used in ConteXt. Kind of numberplate ones.
+--
+-- opentype
+--
+-- This is the 3-letter OpenType language tag, obviously.
+--
+-- variant
+--
+-- This is actually the rfc4646: an extension of ISO-639 that also defines
+-- codes for variants like de-1901 for "German, 1901 orthography" or zh-Hans for
+-- "Chinese, simplified characters" ('Hans' is the ISO-15924 tag for
+-- "HAN ideographs, Simplified" :-) As I said yesterday, I think this
+-- should be the reference since it's exactly what we want: it's really
+-- standard (it's a RFC) and it's more than simply languages. To my
+-- knowledge this is the only system that addresses this issue.
+--
+-- Warning: it's not unique! Because we have two "German" languages
+-- (and could, potentially, have two Chinese, etc.)
+--
+-- Beware: the abbreviations are lowercased, which makes it more
+-- convenient to use them.
+--
+-- todo: add default features
+
+local specifications = {
+ {
+ ["description"] = "Basque",
+ ["script"] = "latn",
+ ["bibliographical"] = "baq",
+ ["terminological"] = "eus",
+ ["context"] = "ba",
+ ["opentype"] = "euq",
+ ["variant"] = "eu",
+ },
+ {
+ ["description"] = "Welsh",
+ ["script"] = "latn",
+ ["bibliographical"] = "wel",
+ ["terminological"] = "cym",
+ ["context"] = "cy",
+ ["opentype"] = "wel",
+ ["variant"] = "cy",
+ },
+ {
+ ["description"] = "Icelandic",
+ ["script"] = "latn",
+ ["bibliographical"] = "ice",
+ ["terminological"] = "isl",
+ ["context"] = "is",
+ ["opentype"] = "isl",
+ ["variant"] = "is",
+ },
+ {
+ ["description"] = "Norwegian, undetermined",
+ ["script"] = "latn",
+ ["bibliographical"] = "nor",
+ ["terminological"] = "nor",
+ ["context"] = "no",
+ ["variant"] = "no",
+ },
+ {
+ ["description"] = "Norwegian bokmal",
+ ["script"] = "latn",
+ ["bibliographical"] = "nob",
+ ["terminological"] = "nob",
+ ["opentype"] = "nor", -- not sure!
+ ["variant"] = "nb",
+ },
+ {
+ ["description"] = "Norwegian nynorsk",
+ ["script"] = "latn",
+ ["bibliographical"] = "nno",
+ ["terminological"] = "nno",
+ ["opentype"] = "nny",
+ ["variant"] = "nn",
+ },
+ {
+ ["description"] = "Ancient Greek",
+ ["script"] = "grek",
+ ["bibliographical"] = "grc",
+ ["terminological"] = "grc",
+ ["context"] = "agr",
+ ["variant"] = "grc",
+ },
+ {
+ ["description"] = "German, 1901 orthography",
+ ["script"] = "latn",
+ ["terminological"] = "deu",
+ ["context"] = "deo",
+ ["opentype"] = "deu",
+ ["variant"] = "de-1901",
+ },
+ {
+ ["description"] = "German, 1996 orthography",
+ ["script"] = "latn",
+ ["bibliographical"] = "ger",
+ ["terminological"] = "deu",
+ ["context"] = "de",
+ ["opentype"] = "deu",
+ ["variant"] = "de-1996",
+ },
+ {
+ ["description"] = "Afrikaans",
+ ["script"] = "latn",
+ ["bibliographical"] = "afr",
+ ["terminological"] = "afr",
+ ["context"] = "af",
+ ["opentype"] = "afk",
+ ["variant"] = "af",
+ },
+ {
+ ["description"] = "Catalan",
+ ["script"] = "latn",
+ ["bibliographical"] = "cat",
+ ["terminological"] = "cat",
+ ["context"] = "ca",
+ ["opentype"] = "cat",
+ ["variant"] = "ca",
+ },
+ {
+ ["description"] = "Czech",
+ ["script"] = "latn",
+ ["bibliographical"] = "cze",
+ ["terminological"] = "ces",
+ ["context"] = "cz",
+ ["opentype"] = "csy",
+ ["variant"] = "cs",
+ },
+ {
+ ["description"] = "Greek",
+ ["script"] = "grek",
+ ["bibliographical"] = "gre",
+ ["terminological"] = "ell",
+ ["context"] = "gr",
+ ["opentype"] = "ell",
+ ["variant"] = "el",
+ },
+ {
+ ["description"] = "American English",
+ ["script"] = "latn",
+ ["bibliographical"] = "eng",
+ ["terminological"] = "eng",
+ ["context"] = "us",
+ ["opentype"] = "eng",
+ ["variant"] = "en-US",
+ },
+ {
+ ["description"] = "British English",
+ ["script"] = "latn",
+ ["bibliographical"] = "eng",
+ ["terminological"] = "eng",
+ ["context"] = "uk",
+ ["opentype"] = "eng",
+ ["variant"] = "en-UK", -- Could be en-GB as well ...
+ },
+ {
+ ["description"] = "Spanish",
+ ["script"] = "latn",
+ ["bibliographical"] = "spa",
+ ["terminological"] = "spa",
+ ["context"] = "es",
+ ["opentype"] = "esp",
+ ["variant"] = "es",
+ },
+ {
+ ["description"] = "Finnish",
+ ["script"] = "latn",
+ ["bibliographical"] = "fin",
+ ["terminological"] = "fin",
+ ["context"] = "fi",
+ ["opentype"] = "fin",
+ ["variant"] = "fi",
+ },
+ {
+ ["description"] = "French",
+ ["script"] = "latn",
+ ["bibliographical"] = "fre",
+ ["terminological"] = "fra",
+ ["context"] = "fr",
+ ["opentype"] = "fra",
+ ["variant"] = "fr",
+ },
+ {
+ ["description"] = "Croatian",
+ ["script"] = "latn",
+ ["bibliographical"] = "scr",
+ ["terminological"] = "hrv",
+ ["context"] = "hr",
+ ["opentype"] = "hrv",
+ ["variant"] = "hr",
+ },
+ {
+ ["description"] = "Hungarian",
+ ["script"] = "latn",
+ ["bibliographical"] = "hun",
+ ["terminological"] = "hun",
+ ["context"] = "hu",
+ ["opentype"] = "hun",
+ ["variant"] = "hu",
+ },
+ {
+ ["description"] = "Italian",
+ ["script"] = "latn",
+ ["bibliographical"] = "ita",
+ ["terminological"] = "ita",
+ ["context"] = "it",
+ ["opentype"] = "ita",
+ ["variant"] = "it",
+ },
+ {
+ ["description"] = "Japanese",
+ ["script"] = "jpan",
+ ["bibliographical"] = "jpn",
+ ["terminological"] = "jpn",
+ ["context"] = "ja",
+ ["opentype"] = "jan",
+ ["variant"] = "ja",
+ },
+ {
+ ["description"] = "Latin",
+ ["script"] = "latn",
+ ["bibliographical"] = "lat",
+ ["terminological"] = "lat",
+ ["context"] = "la",
+ ["opentype"] = "lat",
+ ["variant"] = "la",
+ },
+ {
+ ["description"] = "Portuguese",
+ ["script"] = "latn",
+ ["bibliographical"] = "por",
+ ["terminological"] = "por",
+ ["context"] = "pt",
+ ["opentype"] = "ptg",
+ ["variant"] = "pt",
+ },
+ {
+ ["description"] = "Polish",
+ ["script"] = "latn",
+ ["bibliographical"] = "pol",
+ ["terminological"] = "pol",
+ ["context"] = "pl",
+ ["opentype"] = "plk",
+ ["variant"] = "pl",
+ },
+ {
+ ["description"] = "Romanian",
+ ["script"] = "latn",
+ ["bibliographical"] = "rum",
+ ["terminological"] = "ron",
+ ["context"] = "ro",
+ ["opentype"] = "rom",
+ ["variant"] = "ro",
+ },
+ {
+ ["description"] = "Russian",
+ ["script"] = "cyrl",
+ ["bibliographical"] = "rus",
+ ["terminological"] = "rus",
+ ["context"] = "ru",
+ ["opentype"] = "rus",
+ ["variant"] = "ru",
+ },
+ {
+ ["description"] = "Slovak",
+ ["script"] = "latn",
+ ["bibliographical"] = "slo",
+ ["terminological"] = "slk",
+ ["context"] = "sk",
+ ["opentype"] = "sky",
+ ["variant"] = "sk",
+ },
+ {
+ ["description"] = "Slovenian",
+ ["script"] = "latn",
+ ["bibliographical"] = "slv",
+ ["terminological"] = "slv",
+ ["context"] = "sl",
+ ["opentype"] = "slv",
+ ["variant"] = "sl",
+ },
+ {
+ ["description"] = "Swedish",
+ ["script"] = "latn",
+ ["bibliographical"] = "swe",
+ ["terminological"] = "swe",
+ ["context"] = "sv",
+ ["opentype"] = "sve",
+ ["variant"] = "sv",
+ },
+ {
+ ["description"] = "Turkish",
+ ["script"] = "latn",
+ ["bibliographical"] = "tur",
+ ["terminological"] = "tur",
+ ["context"] = "tr",
+ ["opentype"] = "trk",
+ ["variant"] = "tr",
+ },
+ {
+ ["description"] = "Vietnamese",
+ ["script"] = "latn",
+ ["bibliographical"] = "vie",
+ ["terminological"] = "vie",
+ ["context"] = "vn",
+ ["opentype"] = "vit",
+ ["variant"] = "vi",
+ },
+ {
+ ["description"] = "Chinese, simplified",
+ ["script"] = "hans",
+ ["opentype-script"] = "hani",
+ ["bibliographical"] = "chi",
+ ["terminological"] = "zho",
+ ["context"] = "cn",
+ ["opentype"] = "zhs",
+ ["variant"] = "zh-hans",
+ },
+}
+
+data.specifications = specifications
+
+storage.mark(specifications)
+
+local variants = { } data.variants = variants
+local opentypes = { } data.opentypes = opentypes
+local contexts = { } data.contexts = contexts
+local records = { } data.records = records
+
+
+for k=1,#specifications do
+ local v = languagedata[k]
+ if v.variant then
+ variants[v.variant] = v
+ end
+ if v.opentype then
+ opentypes[v.opentype] = v
+ end
+ local vc = v.context
+ if vc then
+ if type(vc) == "table" then
+ for k=1,#vc do
+ contexts[v] = vc[k]
+ end
+ else
+ contexts[vc] = v
+ end
+ end
+end
+
+setmetatable(variants, { __index = function(t,k)
+ str = lower(str)
+ local v = (l_variant[str] or l_opentype[str] or l_context[str] or l_variant.en).language
+ t[k] = v
+ return v
+end } )
+
+setmetatable(opentypes, { __index = function(t,k)
+ str = lower(str)
+ local v = (l_variant[str] or l_opentype[str] or l_context[str] or l_variant.en).opentype
+ t[k] = v
+ return v
+end
+
+setmetatable(contexts, { __index = function(t,k)
+ str = lower(str)
+ local v = (l_variant[str] or l_opentype[str] or l_context[str] or l_variant[languages.default]).context
+ v = (type(v) == "table" and v[1]) or v
+ t[k] = v
+ return v
+end
+
+setmetatable(records, { __index = function(t,k) -- how useful is this one?
+ str = lower(str)
+ local v = variants[str] or opentypes[str] or contexts[str] or variants.en
+ t[k] = v
+ return v
+end