From 85b7bc695629926641c7cb752fd478adfdf374f3 Mon Sep 17 00:00:00 2001 From: Marius Date: Sun, 4 Jul 2010 15:32:09 +0300 Subject: stable 2010-05-24 13:10 --- tex/context/base/char-cmp.lua | 268 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 tex/context/base/char-cmp.lua (limited to 'tex/context/base/char-cmp.lua') diff --git a/tex/context/base/char-cmp.lua b/tex/context/base/char-cmp.lua new file mode 100644 index 000000000..c7deb7901 --- /dev/null +++ b/tex/context/base/char-cmp.lua @@ -0,0 +1,268 @@ +if not modules then modules = { } end modules ['char-cmp'] = { + version = 1.001, + comment = "companion to char-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local utf = unicode.utf8 +local unpack = unpack or table.unpack + +characters = characters or { } +characters.uncomposed = characters.uncomposed or { } + +--[[ldx-- +

The code defined here may move to the big character table.

+--ldx]]-- + +characters.basedigits = { + ['zero'] = 48, ['one'] = 49, + ['two'] = 50, ['three'] = 51, + ['four'] = 52, ['five'] = 53, + ['six'] = 54, ['seven'] = 55, + ['eight'] = 56, ['nine'] = 57 +} + +--[[ldx-- +

The next three tables can for instance be be used to enhance +kerning tables that lack kerning pairs for these special characters. +Of course they may come in handy elsewhere too

+--ldx]]-- + +-- we can use shcodes, but then we also need slcode and srcode +-- +-- AEligature +-- => slcode == ub('A') +-- => srcode == ub('E') +-- or +-- => shcode == { ub('A'), ub('E') } +-- => reduction = "AE" +-- +-- eacute +-- => shcode == ub('A') +-- or +-- => shcode == { ub('a') } +-- => reduction = "a" + +characters.uncomposed.left = { + AEligature = "A", aeligature = "a", + OEligature = "O", oeligature = "o", + IJligature = "I", ijligature = "i", + AE = "A", ae = "a", + OE = "O", oe = "o", + IJ = "I", ij = "i", + Ssharp = "S", ssharp = "s", +} + +characters.uncomposed.right = { + AEligature = "E", aeligature = "e", + OEligature = "E", oeligature = "e", + IJligature = "J", ijligature = "j", + AE = "E", ae = "e", + OE = "E", oe = "e", + IJ = "J", ij = "j", + Ssharp = "S", ssharp = "s", +} + +characters.uncomposed.both = { + Acircumflex = "A", acircumflex = "a", + Ccircumflex = "C", ccircumflex = "c", + Ecircumflex = "E", ecircumflex = "e", + Gcircumflex = "G", gcircumflex = "g", + Hcircumflex = "H", hcircumflex = "h", + Icircumflex = "I", icircumflex = "i", + Jcircumflex = "J", jcircumflex = "j", + Ocircumflex = "O", ocircumflex = "o", + Scircumflex = "S", scircumflex = "s", + Ucircumflex = "U", ucircumflex = "u", + Wcircumflex = "W", wcircumflex = "w", + Ycircumflex = "Y", ycircumflex = "y", + + Agrave = "A", agrave = "a", + Egrave = "E", egrave = "e", + Igrave = "I", igrave = "i", + Ograve = "O", ograve = "o", + Ugrave = "U", ugrave = "u", + Ygrave = "Y", ygrave = "y", + + Atilde = "A", atilde = "a", + Itilde = "I", itilde = "i", + Otilde = "O", otilde = "o", + Utilde = "U", utilde = "u", + Ntilde = "N", ntilde = "n", + + Adiaeresis = "A", adiaeresis = "a", Adieresis = "A", adieresis = "a", + Ediaeresis = "E", ediaeresis = "e", Edieresis = "E", edieresis = "e", + Idiaeresis = "I", idiaeresis = "i", Idieresis = "I", idieresis = "i", + Odiaeresis = "O", odiaeresis = "o", Odieresis = "O", odieresis = "o", + Udiaeresis = "U", udiaeresis = "u", Udieresis = "U", udieresis = "u", + Ydiaeresis = "Y", ydiaeresis = "y", Ydieresis = "Y", ydieresis = "y", + + Aacute = "A", aacute = "a", + Cacute = "C", cacute = "c", + Eacute = "E", eacute = "e", + Iacute = "I", iacute = "i", + Lacute = "L", lacute = "l", + Nacute = "N", nacute = "n", + Oacute = "O", oacute = "o", + Racute = "R", racute = "r", + Sacute = "S", sacute = "s", + Uacute = "U", uacute = "u", + Yacute = "Y", yacute = "y", + Zacute = "Z", zacute = "z", + + Dstroke = "D", dstroke = "d", + Hstroke = "H", hstroke = "h", + Tstroke = "T", tstroke = "t", + + Cdotaccent = "C", cdotaccent = "c", + Edotaccent = "E", edotaccent = "e", + Gdotaccent = "G", gdotaccent = "g", + Idotaccent = "I", idotaccent = "i", + Zdotaccent = "Z", zdotaccent = "z", + + Amacron = "A", amacron = "a", + Emacron = "E", emacron = "e", + Imacron = "I", imacron = "i", + Omacron = "O", omacron = "o", + Umacron = "U", umacron = "u", + + Ccedilla = "C", ccedilla = "c", + Kcedilla = "K", kcedilla = "k", + Lcedilla = "L", lcedilla = "l", + Ncedilla = "N", ncedilla = "n", + Rcedilla = "R", rcedilla = "r", + Scedilla = "S", scedilla = "s", + Tcedilla = "T", tcedilla = "t", + + Ohungarumlaut = "O", ohungarumlaut = "o", + Uhungarumlaut = "U", uhungarumlaut = "u", + + Aogonek = "A", aogonek = "a", + Eogonek = "E", eogonek = "e", + Iogonek = "I", iogonek = "i", + Uogonek = "U", uogonek = "u", + + Aring = "A", aring = "a", + Uring = "U", uring = "u", + + Abreve = "A", abreve = "a", + Ebreve = "E", ebreve = "e", + Gbreve = "G", gbreve = "g", + Ibreve = "I", ibreve = "i", + Obreve = "O", obreve = "o", + Ubreve = "U", ubreve = "u", + + Ccaron = "C", ccaron = "c", + Dcaron = "D", dcaron = "d", + Ecaron = "E", ecaron = "e", + Lcaron = "L", lcaron = "l", + Ncaron = "N", ncaron = "n", + Rcaron = "R", rcaron = "r", + Scaron = "S", scaron = "s", + Tcaron = "T", tcaron = "t", + Zcaron = "Z", zcaron = "z", + + dotlessI = "I", dotlessi = "i", + dotlessJ = "J", dotlessj = "j", + + AEligature = "AE", aeligature = "ae", AE = "AE", ae = "ae", + OEligature = "OE", oeligature = "oe", OE = "OE", oe = "oe", + IJligature = "IJ", ijligature = "ij", IJ = "IJ", ij = "ij", + + Lstroke = "L", lstroke = "l", Lslash = "L", lslash = "l", + Ostroke = "O", ostroke = "o", Oslash = "O", oslash = "o", + + Ssharp = "SS", ssharp = "ss", + + Aumlaut = "A", aumlaut = "a", + Eumlaut = "E", eumlaut = "e", + Iumlaut = "I", iumlaut = "i", + Oumlaut = "O", oumlaut = "o", + Uumlaut = "U", uumlaut = "u", + +} + +--[[ldx-- +

The following function is used in the indexing code, where +we need some sort of default fallback mapping.

+--ldx]]-- + +function characters.uncompose(n) -- n == string|number, returns string + local cdn + if type(n) == "string" then + cdn = characters.data[utf.byte(n)] + else + cdn = characters.data[n] + end + -- return characters.shape(n) + if cdn then + local shcode = cdn.shcode + if not shcode then + return characters.uncomposed.both[cdn.contextname] or n + elseif type(shcode) == "table" then + return utf.char(unpack(cdn.shcode)) + else + return utf.char(cdn.shcode) + end + end + return n +end + +--[[ldx-- +

Only characters with a code smaller than 128 make sense, +anything larger is encoding dependent. An interesting complication +is that a character can be in an encoding twice but is hashed +once.

+--ldx]]-- + +characters.ligatures = { + ['f'] = { + { 'f', 'ff' }, + { 'i', 'fi' }, + { 'l', 'fl' }, + }, + ['ff'] = { + { 'i', 'ffi' } + }, + ['fi'] = { + { 'i', 'fii' } + }, + ['fl'] = { + { 'i', 'fli' } + }, + ['s'] = { + { 't', 'st' } + }, + ['i'] = { + { 'j', 'ij' } + }, +} + +characters.texligatures = { + -- ['space'] = { + -- { 'L', 'Lslash' }, + -- { 'l', 'lslash' } + -- }, + -- ['question'] = { + -- { 'quoteleft', 'questiondown' } + -- }, + -- ['exclam'] = { + -- { 'quoteleft', 'exclamdown' } + -- }, + ['quoteleft'] = { + { 'quoteleft', 'quotedblleft' } + }, + ['quoteright'] = { + { 'quoteright', 'quotedblright' } + }, + ['hyphen'] = { + { 'hyphen', 'endash' } + }, + ['endash'] = { + { 'hyphen', 'emdash' } + } +} + +--~ U+2019: right single quotation mark / quoteright -- cgit v1.2.3