2016-01-12 16:26:00

author: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
committer: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
commit: 8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree: 94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/char-tex.lua
parent: f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download: context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
1 files changed, 779 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/char-tex.lua b/tex/context/base/mkiv/char-tex.lua
new file mode 100644
index 000000000..df7433409
--- /dev/null
+++ b/tex/context/base/mkiv/char-tex.lua
@@ -0,0 +1,779 @@
+if not modules then modules = { } end modules ['char-tex'] = {
+    version   = 1.001,
+    comment   = "companion to char-ini.mkiv",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files"
+}
+
+
+local lpeg = lpeg
+local next, type = next, type
+local format, find, gmatch = string.format, string.find, string.gmatch
+local utfchar, utfbyte = utf.char, utf.byte
+local concat, tohash = table.concat, table.tohash
+local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
+
+local lpegpatterns          = lpeg.patterns
+local lpegmatch             = lpeg.match
+local utfchartabletopattern = lpeg.utfchartabletopattern
+
+local allocate              = utilities.storage.allocate
+local mark                  = utilities.storage.mark
+
+local context               = context
+local commands              = commands
+
+local characters            = characters
+local texcharacters         = { }
+characters.tex              = texcharacters
+local utffilters            = characters.filters.utf
+
+local is_character          = characters.is_character
+local is_letter             = characters.is_letter
+local is_command            = characters.is_command
+local is_spacing            = characters.is_spacing
+local is_mark               = characters.is_mark
+local is_punctuation        = characters.is_punctuation
+
+local data                  = characters.data  if not data then return end
+local blocks                = characters.blocks
+
+local trace_defining        = false  trackers.register("characters.defining", function(v) characters_defining = v end)
+
+local report_defining       = logs.reporter("characters")
+
+--[[ldx--
+<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to
+8-bit. This is handled in the <l n='luatex'/> engine itself.</p>
+
+<p>This leaves us problems with characters that are specific to <l n='tex'/> like
+<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files
+are sensitive for to a private area (while writing to a utility file) and revert then
+to their original slot when we read in such a file. Instead of reverting, we can (when
+we resolve characters to glyphs) map them to their right glyph there. For this purpose
+we can use the private planes 0x0F0000 and 0x100000.</p>
+--ldx]]--
+
+local low     = allocate()
+local high    = allocate()
+local escapes = allocate()
+local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"
+
+local private = {
+    low     = low,
+    high    = high,
+    escapes = escapes,
+}
+
+utffilters.private = private
+
+for ch in gmatch(special,".") do
+    local cb
+    if type(ch) == "number" then
+        cb, ch = ch, utfchar(ch)
+    else
+        cb = utfbyte(ch)
+    end
+    if cb < 256 then
+        escapes[ch] = "\\" .. ch
+        low[ch] = utfchar(0x0F0000 + cb)
+        if ch == "%" then
+            ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
+        end
+        high[utfchar(0x0F0000 + cb)] = ch
+    end
+end
+
+local tohigh = lpeg.replacer(low)   -- frozen, only for basic tex
+local tolow  = lpeg.replacer(high)  -- frozen, only for basic tex
+
+lpegpatterns.utftohigh = tohigh
+lpegpatterns.utftolow  = tolow
+
+function utffilters.harden(str)
+    return lpegmatch(tohigh,str)
+end
+
+function utffilters.soften(str)
+    return lpegmatch(tolow,str)
+end
+
+private.escape  = utf.remapper(escapes) -- maybe: ,"dynamic"
+private.replace = utf.remapper(low)     -- maybe: ,"dynamic"
+private.revert  = utf.remapper(high)    -- maybe: ,"dynamic"
+
+--[[ldx--
+<p>We get a more efficient variant of this when we integrate
+replacements in collapser. This more or less renders the previous
+private code redundant. The following code is equivalent but the
+first snippet uses the relocated dollars.</p>
+
+<typing>
+[󰀤x󰀤] [$x$]
+</typing>
+--ldx]]--
+
+-- using the tree-lpeg-mapper would be nice but we also need to deal with end-of-string
+-- cases: "\"\i" and don't want "\relax" to be seen as \r e lax" (for which we need to mess
+-- with spaces
+
+local accentmapping = allocate {
+    ['"'] = { [""] = "¨",
+        A = "Ä", a = "ä",
+        E = "Ë", e = "ë",
+        I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï",
+        O = "Ö", o = "ö",
+        U = "Ü", u = "ü",
+        Y = "Ÿ", y = "ÿ",
+    },
+    ["'"] = { [""] = "´",
+        A = "Á", a = "á",
+        C = "Ć", c = "ć",
+        E = "É", e = "é",
+        I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í",
+        L = "Ĺ", l = "ĺ",
+        N = "Ń", n = "ń",
+        O = "Ó", o = "ó",
+        R = "Ŕ", r = "ŕ",
+        S = "Ś", s = "ś",
+        U = "Ú", u = "ú",
+        Y = "Ý", y = "ý",
+        Z = "Ź", z = "ź",
+    },
+    ["."] = { [""] = "˙",
+        C = "Ċ", c = "ċ",
+        E = "Ė", e = "ė",
+        G = "Ġ", g = "ġ",
+        I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i",
+        Z = "Ż", z = "ż",
+    },
+    ["="] = { [""] = "¯",
+        A = "Ā", a = "ā",
+        E = "Ē", e = "ē",
+        I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī",
+        O = "Ō", o = "ō",
+        U = "Ū", u = "ū",
+    },
+    ["H"] = { [""] = "˝",
+        O = "Ő", o = "ő",
+        U = "Ű", u = "ű",
+    },
+    ["^"] = { [""] = "ˆ",
+        A = "Â", a = "â",
+        C = "Ĉ", c = "ĉ",
+        E = "Ê", e = "ê",
+        G = "Ĝ", g = "ĝ",
+        H = "Ĥ", h = "ĥ",
+        I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î",
+        J = "Ĵ", j = "ĵ",
+        O = "Ô", o = "ô",
+        S = "Ŝ", s = "ŝ",
+        U = "Û", u = "û",
+        W = "Ŵ", w = "ŵ",
+        Y = "Ŷ", y = "ŷ",
+    },
+    ["`"] = { [""] = "`",
+        A = "À", a = "à",
+        E = "È", e = "è",
+        I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì",
+        O = "Ò", o = "ò",
+        U = "Ù", u = "ù",
+        Y = "Ỳ", y = "ỳ",
+    },
+    ["c"] = { [""] = "¸",
+        C = "Ç", c = "ç",
+        K = "Ķ", k = "ķ",
+        L = "Ļ", l = "ļ",
+        N = "Ņ", n = "ņ",
+        R = "Ŗ", r = "ŗ",
+        S = "Ş", s = "ş",
+        T = "Ţ", t = "ţ",
+    },
+    ["k"] = { [""] = "˛",
+        A = "Ą", a = "ą",
+        E = "Ę", e = "ę",
+        I = "Į", i = "į",
+        U = "Ų", u = "ų",
+    },
+    ["r"] = { [""] = "˚",
+        A = "Å", a = "å",
+        U = "Ů", u = "ů",
+    },
+    ["u"] = { [""] = "˘",
+        A = "Ă", a = "ă",
+        E = "Ĕ", e = "ĕ",
+        G = "Ğ", g = "ğ",
+        I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ",
+        O = "Ŏ", o = "ŏ",
+        U = "Ŭ", u = "ŭ",
+        },
+    ["v"] = { [""] = "ˇ",
+        C = "Č", c = "č",
+        D = "Ď", d = "ď",
+        E = "Ě", e = "ě",
+        L = "Ľ", l = "ľ",
+        N = "Ň", n = "ň",
+        R = "Ř", r = "ř",
+        S = "Š", s = "š",
+        T = "Ť", t = "ť",
+        Z = "Ž", z = "ž",
+        },
+    ["~"] = { [""] = "˜",
+        A = "Ã", a = "ã",
+        I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ",
+        N = "Ñ", n = "ñ",
+        O = "Õ", o = "õ",
+        U = "Ũ", u = "ũ",
+    },
+}
+
+texcharacters.accentmapping = accentmapping
+
+local accent_map = allocate { -- incomplete
+   ['~'] = "̃" , --  ̃ Ẽ
+   ['"'] = "̈" , --  ̈ Ë
+   ["`"] = "̀" , --  ̀ È
+   ["'"] = "́" , --  ́ É
+   ["^"] = "̂" , --  ̂ Ê
+    --  ̄ Ē
+    --  ̆ Ĕ
+    --  ̇ Ė
+    --  ̉ Ẻ
+    --  ̌ Ě
+    --  ̏ Ȅ
+    --  ̑ Ȇ
+    --  ̣ Ẹ
+    --  ̧ Ȩ
+    --  ̨ Ę
+    --  ̭ Ḙ
+    --  ̰ Ḛ
+}
+
+-- local accents = concat(table.keys(accentmapping)) -- was _map
+
+local function remap_accent(a,c,braced)
+    local m = accentmapping[a]
+    if m then
+        local n = m[c]
+        if n then
+            return n
+        end
+    end
+--     local m = accent_map[a]
+--     if m then
+--         return c .. m
+--     elseif braced then -- or #c > 0
+    if braced then -- or #c > 0
+        return "\\" .. a .. "{" .. c .. "}"
+    else
+        return "\\" .. a .. " " .. c
+    end
+end
+
+local commandmapping = allocate {
+    ["aa"] = "å", ["AA"] = "Å",
+    ["ae"] = "æ", ["AE"] = "Æ",
+    ["cc"] = "ç", ["CC"] = "Ç",
+    ["i"]  = "ı", ["j"]  = "ȷ",
+    ["ij"] = "ĳ", ["IJ"] = "Ĳ",
+    ["l"]  = "ł", ["L"]  = "Ł",
+    ["o"]  = "ø", ["O"]  = "Ø",
+    ["oe"] = "œ", ["OE"] = "Œ",
+    ["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß",
+}
+
+texcharacters.commandmapping = commandmapping
+
+local ligaturemapping = allocate {
+    ["''"]  = "”",
+    ["``"]  = "“",
+    ["--"]  = "–",
+    ["---"] = "—",
+}
+
+-- local achar    = R("az","AZ") + P("ı") + P("\\i")
+--
+-- local spaces   = P(" ")^0
+-- local no_l     = P("{") / ""
+-- local no_r     = P("}") / ""
+-- local no_b     = P('\\') / ""
+--
+-- local lUr      = P("{") * C(achar) * P("}")
+--
+-- local accents_1 = [["'.=^`~]]
+-- local accents_2 = [[Hckruv]]
+--
+-- local accent   = P('\\') * (
+--     C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up
+--     C(S(accents_2)) *  lUr * Cc(true)
+-- ) / remap_accent
+--
+-- local csname  = P('\\') * C(R("az","AZ")^1)
+--
+-- local command  = (
+--     csname +
+--     P("{") * csname * spaces * P("}")
+-- ) / commandmapping -- remap_commands
+--
+-- local both_1 = Cs { "run",
+--     accent  = accent,
+--     command = command,
+--     run     = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0,
+-- }
+--
+-- local both_2 = Cs { "run",
+--     accent  = accent,
+--     command = command,
+--     run     = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0,
+-- }
+--
+-- function texcharacters.toutf(str,strip)
+--     if not find(str,"\\") then
+--         return str
+--     elseif strip then
+--         return lpegmatch(both_1,str)
+--     else
+--         return lpegmatch(both_2,str)
+--     end
+-- end
+
+local untex
+
+local function toutfpattern()
+    if not untex then
+        local hash = { }
+        for k, v in next, accentmapping do
+            for kk, vv in next, v do
+                if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
+                    hash[ "\\"..k.." "..kk     ] = vv
+                    hash["{\\"..k.." "..kk.."}"] = vv
+                else
+                    hash["\\" ..k     ..kk     ] = vv
+                    hash["{\\"..k     ..kk.."}"] = vv
+                end
+                hash["\\" ..k.."{"..kk.."}" ] = vv
+                hash["{\\"..k.."{"..kk.."}}"] = vv
+            end
+        end
+        for k, v in next, commandmapping do
+            hash["\\"..k.." "] = v
+            hash["{\\"..k.."}"] = v
+            hash["{\\"..k.." }"] = v
+        end
+        for k, v in next, ligaturemapping do
+            hash[k] = v
+        end
+        untex = utfchartabletopattern(hash) / hash
+    end
+    return untex
+end
+
+texcharacters.toutfpattern = toutfpattern
+
+local pattern = nil
+
+local function prepare()
+    pattern = Cs((toutfpattern() + P(1))^0)
+    return pattern
+end
+
+function texcharacters.toutf(str,strip)
+    if str == "" then
+        return str
+    elseif not find(str,"\\") then
+        return str
+ -- elseif strip then
+    else
+        return lpegmatch(pattern or prepare(),str)
+    end
+end
+
+-- print(texcharacters.toutf([[\~{Z}]],true))
+-- print(texcharacters.toutf([[\'\i]],true))
+-- print(texcharacters.toutf([[\'{\i}]],true))
+-- print(texcharacters.toutf([[\"{e}]],true))
+-- print(texcharacters.toutf([[\" {e}]],true))
+-- print(texcharacters.toutf([[{\"{e}}]],true))
+-- print(texcharacters.toutf([[{\" {e}}]],true))
+-- print(texcharacters.toutf([[{\l}]],true))
+-- print(texcharacters.toutf([[{\l }]],true))
+-- print(texcharacters.toutf([[\v{r}]],true))
+-- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true))
+-- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))
+
+function texcharacters.safechar(n) -- was characters.safechar
+    local c = data[n]
+    if c and c.contextname then
+        return "\\" .. c.contextname
+    else
+        return utfchar(n)
+    end
+end
+
+if not context or not commands then
+    -- used in e.g. mtx-bibtex
+    return
+end
+
+-- all kind of initializations
+
+if not interfaces then return end
+
+local implement     = interfaces.implement
+
+local tex           = tex
+local texsetlccode  = tex.setlccode
+local texsetuccode  = tex.setuccode
+local texsetsfcode  = tex.setsfcode
+local texsetcatcode = tex.setcatcode
+
+local contextsprint = context.sprint
+local ctxcatcodes   = catcodes.numbers.ctxcatcodes
+
+function texcharacters.defineaccents()
+    local ctx_dodefineaccentcommand = context.dodefineaccentcommand
+    local ctx_dodefineaccent        = context.dodefineaccent
+    local ctx_dodefinecommand       = context.dodefinecommand
+    for accent, group in next, accentmapping do
+        ctx_dodefineaccentcommand(accent)
+        for character, mapping in next, group do
+            ctx_dodefineaccent(accent,character,mapping)
+        end
+    end
+    for command, mapping in next, commandmapping do
+        ctx_dodefinecommand(command,mapping)
+    end
+end
+
+implement { -- a waste of scanner but consistent
+    name    = "defineaccents",
+    actions = texcharacters.defineaccents
+}
+
+--[[ldx--
+<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
+use the table. After all, we have this information available anyway.</p>
+--ldx]]--
+
+function commands.makeactive(n,name) -- not used
+    contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
+ -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
+end
+
+implement {
+    name      = "utfchar",
+    actions   = { utfchar, contextsprint },
+    arguments = "integer"
+}
+
+implement {
+    name      = "safechar",
+    actions   = { texcharacters.safechar, contextsprint },
+    arguments = "integer"
+}
+
+implement {
+    name      = "uchar",
+    arguments = { "integer", "integer" },
+    actions   = function(h,l)
+        context(utfchar(h*256+l))
+    end
+}
+
+tex.uprint = commands.utfchar
+
+-- in contect we don't use lc and uc codes (in fact in luatex we should have a hf code)
+-- so at some point we might drop this
+
+local forbidden = tohash { -- at least now
+    0x00A0,
+    0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D,
+    0x202F,
+    0x205F,
+ -- 0xFEFF,
+}
+
+local csletters = characters.csletters -- also a signal that we have initialized
+local activated = { }
+local sfmode    = "unset" -- unset, traditional, normal
+local block_too = false
+
+directives.register("characters.blockstoo",function(v) block_too = v end)
+
+-- If this is something that is not documentwide and used a lot, then we
+-- need a more clever approach (trivial but not now).
+
+local function setuppersfcodes(v,n)
+    if sfstate ~= "unset" then
+        report_defining("setting uppercase sf codes to %a",n)
+        for u, chr in next, data do
+            if chr.category == "lu" then
+                texsetsfcode(u,n)
+            end
+        end
+    end
+    sfstate = v
+end
+
+directives.register("characters.spaceafteruppercase",function(v)
+    if v == "traditional" then
+        setuppersfcodes(v,999)
+    elseif v == "normal" then
+        setuppersfcodes(v,1000)
+    end
+end)
+
+if not csletters then
+
+    csletters            = allocate()
+    characters.csletters = csletters
+
+    report_defining("setting up character related codes and commands")
+
+    if sfstate == "unset" then
+        sfstate = "traditional"
+    end
+
+    local traditional = sfstate == "traditional"
+
+    for u, chr in next, data do -- will move up
+        local fallback = chr.fallback
+        if fallback then
+            contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}")
+            activated[#activated+1] = u
+        else
+            local contextname = chr.contextname
+            local category    = chr.category
+            local isletter    = is_letter[category]
+            if contextname then
+                if is_character[category] then
+                    if chr.unicodeslot < 128 then
+                        if isletter then
+                            -- setmacro
+                            local c = utfchar(u)
+                            contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,c)) -- has no s
+                            csletters[c] = u
+                        else
+                            -- setchar
+                            contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s
+                        end
+                    else
+                        -- setmacro
+                        local c = utfchar(u)
+                        contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,c)) -- has no s
+                        if isletter and u >= 32 and u <= 65536 then
+                            csletters[c] = u
+                        end
+                    end
+                    --
+                    if isletter then
+                        local lc, uc = chr.lccode, chr.uccode
+                        if not lc then
+                            chr.lccode, lc = u, u
+                        elseif type(lc) == "table" then
+                            lc = u
+                        end
+                        if not uc then
+                            chr.uccode, uc = u, u
+                        elseif type(uc) == "table" then
+                            uc = u
+                        end
+                        texsetlccode(u,lc,uc)
+                        if traditional and category == "lu" then
+                            texsetsfcode(code,999)
+                        end
+                    end
+                    --
+                elseif is_command[category] and not forbidden[u] then
+                    -- set
+                    contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}")
+                    activated[#activated+1] = u
+                elseif is_mark[category] then
+                    texsetlccode(u,u,u) -- for hyphenation
+                end
+            elseif isletter and u >= 32 and u <= 65536 then
+                csletters[utfchar(u)] = u
+                --
+                local lc, uc = chr.lccode, chr.uccode
+                if not lc then
+                    chr.lccode, lc = u, u
+                elseif type(lc) == "table" then
+                    lc = u
+                end
+                if not uc then
+                    chr.uccode, uc = u, u
+                elseif type(uc) == "table" then
+                    uc = u
+                end
+                texsetlccode(u,lc,uc)
+                if traditional and category == "lu" then
+                    texsetsfcode(code,999)
+                end
+                --
+            elseif is_mark[category] then
+                --
+                texsetlccode(u,u,u) -- for hyphenation
+                --
+            end
+        end
+    end
+
+    if blocks_too then
+        -- this slows down format generation by over 10 percent
+        for k, v in next, blocks do
+            if v.catcode == "letter" then
+                for u=v.first,v.last do
+                    csletters[utfchar(u)] = u
+                    --
+                 -- texsetlccode(u,u,u) -- self self
+                    --
+                end
+            end
+        end
+    end
+
+    if storage then
+        storage.register("characters/csletters", csletters, "characters.csletters")
+    end
+
+else
+    mark(csletters)
+
+end
+
+lpegpatterns.csletter = utfchartabletopattern(csletters)
+
+-- todo: get rid of activated
+-- todo: move first loop out ,merge with above
+
+function characters.setlettercatcodes(cct)
+    if trace_defining then
+        report_defining("assigning letter catcodes to catcode table %a",cct)
+    end
+    local saved = tex.catcodetable
+    tex.catcodetable = cct
+    texsetcatcode(0x200C,11) -- non-joiner
+    texsetcatcode(0x200D,11) -- joiner
+    for c, u in next, csletters do
+        texsetcatcode(u,11)
+    end
+ -- for u, chr in next, data do
+ --     if not chr.fallback and is_letter[chr.category] and u >= 32 and u <= 65536 then
+ --         texsetcatcode(u,11)
+ --     end
+ --     local range = chr.range
+ --     if range then
+ --         for i=1,range.first,range.last do -- tricky as not all are letters
+ --             texsetcatcode(i,11)
+ --         end
+ --     end
+ -- end
+ -- for k, v in next, blocks do
+ --     if v.catcode == "letter" then
+ --         for u=v.first,v.last do
+ --             texsetcatcode(u,11)
+ --         end
+ --     end
+ -- end
+    tex.catcodetable = saved
+end
+
+function characters.setactivecatcodes(cct)
+    local saved = tex.catcodetable
+    tex.catcodetable = cct
+    for i=1,#activated do
+        local u = activated[i]
+        texsetcatcode(u,13)
+        if trace_defining then
+            report_defining("character %U (%s) is active in set %a",u,data[u].description,cct)
+        end
+    end
+    tex.catcodetable = saved
+end
+
+--[[ldx--
+<p>Setting the lccodes is also done in a loop over the data table.</p>
+--ldx]]--
+
+-- function characters.setcodes() -- we could loop over csletters
+--     if trace_defining then
+--         report_defining("defining lc and uc codes")
+--     end
+--     local traditional = sfstate == "traditional" or sfstate == "unset"
+--     for code, chr in next, data do
+--         local cc = chr.category
+--         if is_letter[cc] then
+--             local range = chr.range
+--             if range then
+--                 for i=range.first,range.last do
+--                     texsetlccode(i,i,i) -- self self
+--                 end
+--             else
+--                 local lc, uc = chr.lccode, chr.uccode
+--                 if not lc then
+--                     chr.lccode, lc = code, code
+--                 elseif type(lc) == "table" then
+--                     lc = code
+--                 end
+--                 if not uc then
+--                     chr.uccode, uc = code, code
+--                 elseif type(uc) == "table" then
+--                     uc = code
+--                 end
+--                 texsetlccode(code,lc,uc)
+--                 if traditional and cc == "lu" then
+--                     texsetsfcode(code,999)
+--                 end
+--             end
+--         elseif is_mark[cc] then
+--             texsetlccode(code,code,code) -- for hyphenation
+--         end
+--     end
+--     if traditional then
+--         sfstate = "traditional"
+--     end
+-- end
+
+-- tex
+
+implement {
+    name      = "chardescription",
+    arguments = "integer",
+    actions   = function(slot)
+        local d = data[slot]
+        if d then
+            context(d.description)
+        end
+    end,
+}
+
+-- xml
+
+characters.activeoffset = 0x10000 -- there will be remapped in that byte range
+
+function commands.remapentity(chr,slot) -- not used
+    contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
+end
+
+-- xml.entities = xml.entities or { }
+--
+-- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
+--
+-- function characters.setmkiventities()
+--     local entities = xml.entities
+--     entities.lt  = "<"
+--     entities.amp = "&"
+--     entities.gt  = ">"
+-- end
+--
+-- function characters.setmkiientities()
+--     local entities = xml.entities
+--     entities.lt  = utfchar(characters.activeoffset + utfbyte("<"))
+--     entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
+--     entities.gt  = utfchar(characters.activeoffset + utfbyte(">"))
+-- end
+
+implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" }
+implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" }
+--------- { name = "setcharactercodes", scope = "private", actions = characters.setcodes }
author	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
committer	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
commit	8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree	94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/char-tex.lua
parent	f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download	context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz