From 85b7bc695629926641c7cb752fd478adfdf374f3 Mon Sep 17 00:00:00 2001
From: Marius <mariausol@gmail.com>
Date: Sun, 4 Jul 2010 15:32:09 +0300
Subject: stable 2010-05-24 13:10

---
 tex/context/base/char-cmp.lua | 268 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 268 insertions(+)
 create mode 100644 tex/context/base/char-cmp.lua

(limited to 'tex/context/base/char-cmp.lua')
diff --git a/tex/context/base/char-cmp.lua b/tex/context/base/char-cmp.lua
new file mode 100644
index 000000000..c7deb7901
--- /dev/null
+++ b/tex/context/base/char-cmp.lua
@@ -0,0 +1,268 @@
+if not modules then modules = { } end modules ['char-cmp'] = {
+    version   = 1.001,
+    comment   = "companion to char-ini.mkiv",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files"
+}
+
+local utf = unicode.utf8
+local unpack = unpack or table.unpack
+
+characters            = characters            or { }
+characters.uncomposed = characters.uncomposed or { }
+
+--[[ldx--
+<p>The code defined here may move to the big character table.</p>
+--ldx]]--
+
+characters.basedigits = {
+    ['zero']  = 48, ['one']   = 49,
+    ['two']   = 50, ['three'] = 51,
+    ['four']  = 52, ['five']  = 53,
+    ['six']   = 54, ['seven'] = 55,
+    ['eight'] = 56, ['nine']  = 57
+}
+
+--[[ldx--
+<p>The next three tables can for instance be be used to enhance
+kerning tables that lack kerning pairs for these special characters.
+Of course they may come in handy elsewhere too</p>
+--ldx]]--
+
+-- we can use shcodes, but then we also need slcode and srcode
+--
+-- AEligature
+--    => slcode == ub('A')
+--    => srcode == ub('E')
+-- or
+--    => shcode == { ub('A'), ub('E') }
+--    => reduction = "AE"
+--
+-- eacute
+--    => shcode == ub('A')
+-- or
+--    => shcode == { ub('a') }
+--    => reduction = "a"
+
+characters.uncomposed.left = {
+    AEligature = "A",  aeligature = "a",
+    OEligature = "O",  oeligature = "o",
+    IJligature = "I",  ijligature = "i",
+    AE         = "A",  ae         = "a",
+    OE         = "O",  oe         = "o",
+    IJ         = "I",  ij         = "i",
+    Ssharp     = "S",  ssharp     = "s",
+}
+
+characters.uncomposed.right = {
+    AEligature = "E",  aeligature = "e",
+    OEligature = "E",  oeligature = "e",
+    IJligature = "J",  ijligature = "j",
+    AE         = "E",  ae         = "e",
+    OE         = "E",  oe         = "e",
+    IJ         = "J",  ij         = "j",
+    Ssharp     = "S",  ssharp     = "s",
+}
+
+characters.uncomposed.both = {
+    Acircumflex = "A",  acircumflex = "a",
+    Ccircumflex = "C",  ccircumflex = "c",
+    Ecircumflex = "E",  ecircumflex = "e",
+    Gcircumflex = "G",  gcircumflex = "g",
+    Hcircumflex = "H",  hcircumflex = "h",
+    Icircumflex = "I",  icircumflex = "i",
+    Jcircumflex = "J",  jcircumflex = "j",
+    Ocircumflex = "O",  ocircumflex = "o",
+    Scircumflex = "S",  scircumflex = "s",
+    Ucircumflex = "U",  ucircumflex = "u",
+    Wcircumflex = "W",  wcircumflex = "w",
+    Ycircumflex = "Y",  ycircumflex = "y",
+
+    Agrave = "A",  agrave = "a",
+    Egrave = "E",  egrave = "e",
+    Igrave = "I",  igrave = "i",
+    Ograve = "O",  ograve = "o",
+    Ugrave = "U",  ugrave = "u",
+    Ygrave = "Y",  ygrave = "y",
+
+    Atilde = "A",  atilde = "a",
+    Itilde = "I",  itilde = "i",
+    Otilde = "O",  otilde = "o",
+    Utilde = "U",  utilde = "u",
+    Ntilde = "N",  ntilde = "n",
+
+    Adiaeresis = "A",  adiaeresis = "a",  Adieresis = "A",  adieresis = "a",
+    Ediaeresis = "E",  ediaeresis = "e",  Edieresis = "E",  edieresis = "e",
+    Idiaeresis = "I",  idiaeresis = "i",  Idieresis = "I",  idieresis = "i",
+    Odiaeresis = "O",  odiaeresis = "o",  Odieresis = "O",  odieresis = "o",
+    Udiaeresis = "U",  udiaeresis = "u",  Udieresis = "U",  udieresis = "u",
+    Ydiaeresis = "Y",  ydiaeresis = "y",  Ydieresis = "Y",  ydieresis = "y",
+
+    Aacute = "A",  aacute = "a",
+    Cacute = "C",  cacute = "c",
+    Eacute = "E",  eacute = "e",
+    Iacute = "I",  iacute = "i",
+    Lacute = "L",  lacute = "l",
+    Nacute = "N",  nacute = "n",
+    Oacute = "O",  oacute = "o",
+    Racute = "R",  racute = "r",
+    Sacute = "S",  sacute = "s",
+    Uacute = "U",  uacute = "u",
+    Yacute = "Y",  yacute = "y",
+    Zacute = "Z",  zacute = "z",
+
+    Dstroke = "D",  dstroke = "d",
+    Hstroke = "H",  hstroke = "h",
+    Tstroke = "T",  tstroke = "t",
+
+    Cdotaccent = "C",  cdotaccent = "c",
+    Edotaccent = "E",  edotaccent = "e",
+    Gdotaccent = "G",  gdotaccent = "g",
+    Idotaccent = "I",  idotaccent = "i",
+    Zdotaccent = "Z",  zdotaccent = "z",
+
+    Amacron = "A",  amacron = "a",
+    Emacron = "E",  emacron = "e",
+    Imacron = "I",  imacron = "i",
+    Omacron = "O",  omacron = "o",
+    Umacron = "U",  umacron = "u",
+
+    Ccedilla = "C",  ccedilla = "c",
+    Kcedilla = "K",  kcedilla = "k",
+    Lcedilla = "L",  lcedilla = "l",
+    Ncedilla = "N",  ncedilla = "n",
+    Rcedilla = "R",  rcedilla = "r",
+    Scedilla = "S",  scedilla = "s",
+    Tcedilla = "T",  tcedilla = "t",
+
+    Ohungarumlaut = "O",  ohungarumlaut = "o",
+    Uhungarumlaut = "U",  uhungarumlaut = "u",
+
+    Aogonek = "A",  aogonek = "a",
+    Eogonek = "E",  eogonek = "e",
+    Iogonek = "I",  iogonek = "i",
+    Uogonek = "U",  uogonek = "u",
+
+    Aring = "A",  aring = "a",
+    Uring = "U",  uring = "u",
+
+    Abreve = "A",  abreve = "a",
+    Ebreve = "E",  ebreve = "e",
+    Gbreve = "G",  gbreve = "g",
+    Ibreve = "I",  ibreve = "i",
+    Obreve = "O",  obreve = "o",
+    Ubreve = "U",  ubreve = "u",
+
+    Ccaron = "C",  ccaron = "c",
+    Dcaron = "D",  dcaron = "d",
+    Ecaron = "E",  ecaron = "e",
+    Lcaron = "L",  lcaron = "l",
+    Ncaron = "N",  ncaron = "n",
+    Rcaron = "R",  rcaron = "r",
+    Scaron = "S",  scaron = "s",
+    Tcaron = "T",  tcaron = "t",
+    Zcaron = "Z",  zcaron = "z",
+
+    dotlessI = "I",  dotlessi = "i",
+    dotlessJ = "J",  dotlessj = "j",
+
+    AEligature = "AE",  aeligature = "ae",  AE         = "AE",  ae         = "ae",
+    OEligature = "OE",  oeligature = "oe",  OE         = "OE",  oe         = "oe",
+    IJligature = "IJ",  ijligature = "ij",  IJ         = "IJ",  ij         = "ij",
+
+    Lstroke    = "L",   lstroke    = "l",   Lslash     = "L",   lslash     = "l",
+    Ostroke    = "O",   ostroke    = "o",   Oslash     = "O",   oslash     = "o",
+
+    Ssharp     = "SS",  ssharp     = "ss",
+
+    Aumlaut = "A",  aumlaut = "a",
+    Eumlaut = "E",  eumlaut = "e",
+    Iumlaut = "I",  iumlaut = "i",
+    Oumlaut = "O",  oumlaut = "o",
+    Uumlaut = "U",  uumlaut = "u",
+
+}
+
+--[[ldx--
+<p>The following function is used in the indexing code, where
+we need some sort of default fallback mapping.</p>
+--ldx]]--
+
+function characters.uncompose(n) -- n == string|number, returns string
+    local cdn
+    if type(n) == "string" then
+        cdn = characters.data[utf.byte(n)]
+    else
+        cdn = characters.data[n]
+    end
+    -- return characters.shape(n)
+    if cdn then
+        local shcode = cdn.shcode
+        if not shcode then
+            return characters.uncomposed.both[cdn.contextname] or n
+        elseif type(shcode) == "table" then
+            return utf.char(unpack(cdn.shcode))
+        else
+            return utf.char(cdn.shcode)
+        end
+    end
+    return n
+end
+
+--[[ldx--
+<p>Only characters with a code smaller than 128 make sense,
+anything larger is encoding dependent. An interesting complication
+is that a character can be in an encoding twice but is hashed
+once.</p>
+--ldx]]--
+
+characters.ligatures = {
+    ['f'] = {
+        { 'f', 'ff' },
+        { 'i', 'fi' },
+        { 'l', 'fl' },
+    },
+    ['ff'] = {
+        { 'i', 'ffi' }
+    },
+    ['fi'] = {
+        { 'i', 'fii' }
+    },
+    ['fl'] = {
+        { 'i', 'fli' }
+    },
+    ['s'] = {
+        { 't', 'st' }
+    },
+    ['i'] = {
+        { 'j', 'ij' }
+    },
+}
+
+characters.texligatures = {
+ -- ['space'] = {
+ --     { 'L', 'Lslash' },
+ --     { 'l', 'lslash' }
+ -- },
+ -- ['question'] = {
+ --     { 'quoteleft', 'questiondown' }
+ -- },
+ -- ['exclam'] = {
+ --     { 'quoteleft', 'exclamdown' }
+ -- },
+    ['quoteleft'] = {
+        { 'quoteleft', 'quotedblleft' }
+    },
+    ['quoteright'] = {
+        { 'quoteright', 'quotedblright' }
+    },
+    ['hyphen'] = {
+        { 'hyphen', 'endash' }
+    },
+    ['endash'] = {
+        { 'hyphen', 'emdash' }
+    }
+}
+
+--~ U+2019: right single quotation mark / quoteright
-- 
cgit v1.2.3