2014-07-06 21:19:00

author: Context Git Mirror Bot <phg42.2a@gmail.com> 2014-07-06 22:15:04 +0200
committer: Context Git Mirror Bot <phg42.2a@gmail.com> 2014-07-06 22:15:04 +0200
commit: 9081241531f9d2adb062dd168a184c6c272456d9 (patch)
tree: f08bdb10820f24627450387dfe71ebd73881d714 /tex
parent: 5ab3de5b82ca897d811c6f649895cee1dd7e7e56 (diff)
download: context-9081241531f9d2adb062dd168a184c6c272456d9.tar.gz
23 files changed, 1004 insertions, 762 deletions
diff --git a/tex/context/base/char-def.lua b/tex/context/base/char-def.lua
index 0e1d8778e..f30e82898 100644
--- a/tex/context/base/char-def.lua
+++ b/tex/context/base/char-def.lua
@@ -2389,6 +2389,7 @@ characters.data={
   direction="l",
   linebreak="al",
   uccode={ 0x53, 0x53 },
+  shcode={ 0x73, 0x73 },
   unicodeslot=0xDF,
  },
  {
@@ -214783,4 +214784,4 @@ characters.data={
   linebreak="cm",
   unicodeslot=0xE01EF,
  },
-}
-\ No newline at end of file
+}
diff --git a/tex/context/base/char-enc.lua b/tex/context/base/char-enc.lua
index 048837eec..c2061891a 100644
--- a/tex/context/base/char-enc.lua
+++ b/tex/context/base/char-enc.lua
@@ -9,6 +9,8 @@ if not modules then modules = { } end modules ['char-enc'] = {
 
 -- Thanks to tex4ht for these mappings.
 
+local next = next
+
 local allocate, setinitializer = utilities.storage.allocate, utilities.storage.setinitializer
 
 characters       = characters or { }
@@ -169,7 +171,10 @@ characters.synonyms = allocate { -- afm mess
 -- that table.print would not work on this file unless it is accessed once. This
 -- why the serializer does a dummy access.
 
-local enccodes = allocate()  characters.enccodes = enccodes
+local enccodes      = allocate()
+characters.enccodes = enccodes
+
+ -- maybe omit context name -> then same as encodings.make_unicode_vector
 
 local function initialize()
     for unicode, data in next, characters.data do
@@ -179,7 +184,9 @@ local function initialize()
         end
     end
     for name, unicode in next, characters.synonyms do
-        if not enccodes[name] then enccodes[name] = unicode end
+        if not enccodes[name] then
+            enccodes[name] = unicode
+        end
     end
 end
 
diff --git a/tex/context/base/char-fio.lua b/tex/context/base/char-fio.lua
new file mode 100644
index 000000000..766ea7123
--- /dev/null
+++ b/tex/context/base/char-fio.lua
@@ -0,0 +1,56 @@
+if not modules then modules = { } end modules ['char-fio'] = {
+    version   = 1.001,
+    comment   = "companion to char-ini.mkiv",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files"
+}
+
+-- --
+
+local sequencers      = utilities.sequencers
+local appendaction    = sequencers.appendaction
+local enableaction    = sequencers.enableaction
+local disableaction   = sequencers.disableaction
+
+local utffilters      = characters.filters.utf
+
+local textfileactions = resolvers.openers.helpers.textfileactions
+local textlineactions = resolvers.openers.helpers.textlineactions
+
+appendaction (textfileactions,"system","characters.filters.utf.reorder")
+disableaction(textfileactions,         "characters.filters.utf.reorder")
+
+appendaction (textlineactions,"system","characters.filters.utf.reorder")
+disableaction(textlineactions,         "characters.filters.utf.reorder")
+
+appendaction (textfileactions,"system","characters.filters.utf.collapse")
+disableaction(textfileactions,         "characters.filters.utf.collapse")
+
+appendaction (textfileactions,"system","characters.filters.utf.decompose")
+disableaction(textfileactions,         "characters.filters.utf.decompose")
+
+function characters.filters.utf.enable()
+    enableaction(textfileactions,"characters.filters.utf.reorder")
+    enableaction(textfileactions,"characters.filters.utf.collapse")
+    enableaction(textfileactions,"characters.filters.utf.decompose")
+end
+
+local function configure(what,v)
+    if not v then
+        disableaction(textfileactions,what)
+        disableaction(textlineactions,what)
+    elseif v == "line" then
+        disableaction(textfileactions,what)
+        enableaction (textlineactions,what)
+    else -- true or text
+        enableaction (textfileactions,what)
+        disableaction(textlineactions,what)
+    end
+end
+
+directives.register("filters.utf.reorder",   function(v) configure("characters.filters.utf.reorder",  v) end)
+directives.register("filters.utf.collapse",  function(v) configure("characters.filters.utf.collapse", v) end)
+directives.register("filters.utf.decompose", function(v) configure("characters.filters.utf.decompose",v) end)
+
+utffilters.setskippable { "mkiv", "mkvi", "mkix", "mkxi" }
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index eb73cc19e..a2505c0eb 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -7,26 +7,33 @@ if not modules then modules = { } end modules ['char-ini'] = {
 }
 
 -- todo: make two files, one for format generation, one for format use
+-- todo: move some to char-utf
 
 -- we can remove the tag range starting at 0xE0000 (special applications)
 
 local utfchar, utfbyte, utfvalues, ustring, utotable = utf.char, utf.byte, utf.values, utf.ustring, utf.totable
 local concat, unpack, tohash = table.concat, table.unpack, table.tohash
 local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
-local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch
-local P, R, Cs, lpegmatch, patterns = lpeg.P, lpeg.R, lpeg.Cs, lpeg.match, lpeg.patterns
+local format, lower, gsub = string.format, string.lower, string.gsub
+local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs
 
-local utf8byte          = patterns.utf8byte
-local utf8char          = patterns.utf8char
+if not characters then require("char-def") end
 
-local allocate          = utilities.storage.allocate
-local mark              = utilities.storage.mark
+local lpegpatterns          = lpeg.patterns
+local lpegmatch             = lpeg.match
+local utf8byte              = lpegpatterns.utf8byte
+local utf8char              = lpegpatterns.utf8char
 
-local setmetatableindex = table.setmetatableindex
+local utfchartabletopattern = lpeg.utfchartabletopattern
 
-local trace_defining    = false  trackers.register("characters.defining", function(v) characters_defining = v end)
+local allocate              = utilities.storage.allocate
+local mark                  = utilities.storage.mark
 
-local report_defining   = logs.reporter("characters")
+local setmetatableindex     = table.setmetatableindex
+
+local trace_defining        = false  trackers.register("characters.defining", function(v) characters_defining = v end)
+
+local report_defining       = logs.reporter("characters")
 
 --[[ldx--
 <p>This module implements some methods and creates additional datastructured
@@ -60,7 +67,7 @@ end
 
 local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end)
 
-patterns.chartonumber = pattern
+lpegpatterns.chartonumber = pattern
 
 local function chartonumber(k)
     if type(k) == "string" then
@@ -420,13 +427,15 @@ setmetatableindex(otfscripts,function(t,unicode)
     return "dflt"
 end)
 
+local splitter = lpeg.splitat(S(":-"))
+
 function characters.getrange(name) -- used in font fallback definitions (name or range)
     local range = blocks[name]
     if range then
         return range.first, range.last, range.description, range.gaps
     end
     name = gsub(name,'"',"0x") -- goodie: tex hex notation
-    local start, stop = match(name,"^(.-)[%-%:](.-)$")
+    local start, stop = lpegmatch(splitter,name)
     if start and stop then
         start, stop = tonumber(start,16) or tonumber(start), tonumber(stop,16) or tonumber(stop)
         if start and stop then
@@ -870,17 +879,92 @@ end
 ----- toupper = Cs((utf8byte/ucchars)^0)
 ----- toshape = Cs((utf8byte/shchars)^0)
 
-local tolower = Cs((utf8char/lcchars)^0)
-local toupper = Cs((utf8char/ucchars)^0)
-local toshape = Cs((utf8char/shchars)^0)
-
-patterns.tolower = tolower
-patterns.toupper = toupper
-patterns.toshape = toshape
+local tolower = Cs((utf8char/lcchars)^0) -- no need to check spacing
+local toupper = Cs((utf8char/ucchars)^0) -- no need to check spacing
+local toshape = Cs((utf8char/shchars)^0) -- no need to check spacing
+
+lpegpatterns.tolower = tolower
+lpegpatterns.toupper = toupper
+lpegpatterns.toshape = toshape
+
+-- function characters.lower (str) return lpegmatch(tolower,str) end
+-- function characters.upper (str) return lpegmatch(toupper,str) end
+-- function characters.shaped(str) return lpegmatch(toshape,str) end
+
+local lhash = { }
+local uhash = { }
+local shash = { }
+
+for k, v in next, characters.data do
+ -- if k < 0x11000 then
+        local l = v.lccode
+        if l then
+            if type(l) == "number" then
+                lhash[utfchar(k)] = utfchar(l)
+            elseif #l == 2 then
+                lhash[utfchar(k)] = utfchar(l[1]) .. utfchar(l[2])
+            else
+                inspect(v)
+            end
+        else
+            local u = v.uccode
+            if u then
+                if type(u) == "number" then
+                    uhash[utfchar(k)] = utfchar(u)
+                elseif #u == 2 then
+                    uhash[utfchar(k)] = utfchar(u[1]) .. utfchar(u[2])
+                else
+                    inspect(v)
+                end
+            end
+        end
+        local s = v.shcode
+        if s then
+            if type(s) == "number" then
+                shash[utfchar(k)] = utfchar(s)
+            elseif #s == 2 then
+                shash[utfchar(k)] = utfchar(s[1]) .. utfchar(s[2])
+            else
+                inspect(v)
+            end
+        end
+ -- end
+end
 
-function characters.lower (str) return lpegmatch(tolower,str) end
-function characters.upper (str) return lpegmatch(toupper,str) end
-function characters.shaped(str) return lpegmatch(toshape,str) end
+local utf8lower = Cs((utfchartabletopattern(lhash) / lhash + utf8char)^0)
+local utf8upper = Cs((utfchartabletopattern(uhash) / uhash + utf8char)^0)
+local utf8shape = Cs((utfchartabletopattern(shash) / shash + utf8char)^0)
+
+lpegpatterns.utf8lower = utf8lower
+lpegpatterns.utf8upper = utf8upper
+lpegpatterns.utf8shape = utf8shape
+
+function characters.lower (str) return lpegmatch(utf8lower,str) end
+function characters.upper (str) return lpegmatch(utf8upper,str) end
+function characters.shaped(str) return lpegmatch(utf8shape,str) end
+
+-- local str = [[
+--     ÀÁÂÃÄÅàáâãäå àáâãäåàáâãäå ÀÁÂÃÄÅÀÁÂÃÄÅ AAAAAAaaaaaa
+--     ÆÇæç         æçæç         ÆÇÆÇ         AECaec
+--     ÈÉÊËèéêë     èéêëèéêë     ÈÉÊËÈÉÊË     EEEEeeee
+--     ÌÍÎÏÞìíîïþ   ìíîïþìíîïþ   ÌÍÎÏÞÌÍÎÏÞ   IIIIÞiiiiþ
+--     Ðð           ðð           ÐÐ           Ðð
+--     Ññ           ññ           ÑÑ           Nn
+--     ÒÓÔÕÖòóôõö   òóôõöòóôõö   ÒÓÔÕÖÒÓÔÕÖ   OOOOOooooo
+--     Øø           øø           ØØ           Oo
+--     ÙÚÛÜùúûü     ùúûüùúûü     ÙÚÛÜÙÚÛÜ     UUUUuuuu
+--     Ýýÿ          ýýÿ          ÝÝŸ          Yyy
+--     ß            ß            SS           ss
+--     Ţţ           ţţ           ŢŢ           Tt
+-- ]]
+--
+-- local lower  = characters.lower   print(lower(str))
+-- local upper  = characters.upper   print(upper(str))
+-- local shaped = characters.shaped  print(shaped(str))
+--
+-- local c, n = os.clock(), 10000
+-- for i=1,n do lower(str) upper(str) shaped(str) end -- 2.08 => 0.77
+-- print(os.clock()-c,n*#str*3)
 
 -- maybe: (twice as fast when much ascii)
 --
@@ -929,15 +1013,6 @@ end
 function characters.uccode(n) return uccodes[n] end -- obsolete
 function characters.lccode(n) return lccodes[n] end -- obsolete
 
-function characters.safechar(n)
-    local c = data[n]
-    if c and c.contextname then
-        return "\\" .. c.contextname
-    else
-        return utfchar(n)
-    end
-end
-
 function characters.shape(n)
     local shcode = shcodes[n]
     if not shcode then
@@ -992,36 +1067,36 @@ end
 --     groupdata[group] = gdata
 -- end
 
---~ characters.data, characters.groups = chardata, groupdata
-
---~  [0xF0000]={
---~   category="co",
---~   cjkwd="a",
---~   description="<Plane 0x000F Private Use, First>",
---~   direction="l",
---~   unicodeslot=0xF0000,
---~  },
---~  [0xFFFFD]={
---~   category="co",
---~   cjkwd="a",
---~   description="<Plane 0x000F Private Use, Last>",
---~   direction="l",
---~   unicodeslot=0xFFFFD,
---~  },
---~  [0x100000]={
---~   category="co",
---~   cjkwd="a",
---~   description="<Plane 0x0010 Private Use, First>",
---~   direction="l",
---~   unicodeslot=0x100000,
---~  },
---~  [0x10FFFD]={
---~   category="co",
---~   cjkwd="a",
---~   description="<Plane 0x0010 Private Use, Last>",
---~   direction="l",
---~   unicodeslot=0x10FFFD,
---~  },
+-- characters.data, characters.groups = chardata, groupdata
+
+--  [0xF0000]={
+--   category="co",
+--   cjkwd="a",
+--   description="<Plane 0x000F Private Use, First>",
+--   direction="l",
+--   unicodeslot=0xF0000,
+--  },
+--  [0xFFFFD]={
+--   category="co",
+--   cjkwd="a",
+--   description="<Plane 0x000F Private Use, Last>",
+--   direction="l",
+--   unicodeslot=0xFFFFD,
+--  },
+--  [0x100000]={
+--   category="co",
+--   cjkwd="a",
+--   description="<Plane 0x0010 Private Use, First>",
+--   direction="l",
+--   unicodeslot=0x100000,
+--  },
+--  [0x10FFFD]={
+--   category="co",
+--   cjkwd="a",
+--   description="<Plane 0x0010 Private Use, Last>",
+--   direction="l",
+--   unicodeslot=0x10FFFD,
+--  },
 
 if not characters.superscripts then
 
@@ -1078,259 +1153,6 @@ function characters.showstring(str)
     end
 end
 
--- the following code will move to char-tex.lua
-
--- tex
-
-if not tex or not context or not commands then return characters end
-
-local tex           = tex
-local texsetlccode  = tex.setlccode
-local texsetuccode  = tex.setuccode
-local texsetsfcode  = tex.setsfcode
-local texsetcatcode = tex.setcatcode
-
-local contextsprint = context.sprint
-local ctxcatcodes   = catcodes.numbers.ctxcatcodes
-
---[[ldx--
-<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
-use the table. After all, we have this information available anyway.</p>
---ldx]]--
-
-function commands.makeactive(n,name) --
-    contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
- -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
-end
-
-function commands.utfchar(c,n)
-    if n then
-     -- contextsprint(c,charfromnumber(n))
-        contextsprint(c,utfchar(n))
-    else
-     -- contextsprint(charfromnumber(c))
-        contextsprint(utfchar(c))
-    end
-end
-
-function commands.safechar(n)
-    local c = data[n]
-    if c and c.contextname then
-        contextsprint("\\" .. c.contextname) -- context[c.contextname]()
-    else
-        contextsprint(utfchar(n))
-    end
-end
-
-tex.uprint = commands.utfchar
-
-local forbidden = tohash { -- at least now
-    0x00A0,
-    0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D,
-    0x202F,
-    0x205F,
- -- 0xFEFF,
-}
-
-function characters.define(tobelettered, tobeactivated) -- catcodetables
-
-    if trace_defining then
-        report_defining("defining active character commands")
-    end
-
-    local activated, a = { }, 0
-
-    for u, chr in next, data do -- these will be commands
-        local fallback = chr.fallback
-        if fallback then
-            contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}")
-            a = a + 1
-            activated[a] = u
-        else
-            local contextname = chr.contextname
-            if contextname then
-                local category = chr.category
-                if is_character[category] then
-                    if chr.unicodeslot < 128 then
-                        if is_letter[category] then
-                            contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
-                        else
-                            contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s
-                        end
-                    else
-                        contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
-                    end
-                elseif is_command[category] and not forbidden[u] then
-                    contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}")
-                    a = a + 1
-                    activated[a] = u
-                end
-            end
-        end
-    end
-
-    if tobelettered then -- shared
-        local saved = tex.catcodetable
-        for i=1,#tobelettered do
-            tex.catcodetable = tobelettered[i]
-            if trace_defining then
-                report_defining("defining letters (global, shared)")
-            end
-            for u, chr in next, data do
-                if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then
-                    texsetcatcode(u,11)
-                end
-                local range = chr.range
-                if range then
-                    for i=1,range.first,range.last do -- tricky as not all are letters
-                        texsetcatcode(i,11)
-                    end
-                end
-            end
-            texsetcatcode(0x200C,11) -- non-joiner
-            texsetcatcode(0x200D,11) -- joiner
-            for k, v in next, blocks do
-                if v.catcode == "letter" then
-                    for i=v.first,v.last do
-                        texsetcatcode(i,11)
-                    end
-                end
-            end
-        end
-        tex.catcodetable = saved
-    end
-
-    local nofactivated = #tobeactivated
-    if tobeactivated and nofactivated > 0 then
-        for i=1,nofactivated do
-            local u = activated[i]
-            if u then
-                report_defining("character %U is active in set %a, containing %a",u,data[u].description,tobeactivated)
-            end
-        end
-        local saved = tex.catcodetable
-        for i=1,#tobeactivated do
-            local vector = tobeactivated[i]
-            if trace_defining then
-                report_defining("defining %a active characters in vector %a",nofactivated,vector)
-            end
-            tex.catcodetable = vector
-            for i=1,nofactivated do
-                local u = activated[i]
-                if u then
-                    texsetcatcode(u,13)
-                end
-            end
-        end
-        tex.catcodetable = saved
-    end
-
-end
-
---[[ldx--
-<p>Setting the lccodes is also done in a loop over the data table.</p>
---ldx]]--
-
-local sfmode = "unset" -- unset, traditional, normal
-
-function characters.setcodes()
-    if trace_defining then
-        report_defining("defining lc and uc codes")
-    end
-    local traditional = sfstate == "traditional" or sfstate == "unset"
-    for code, chr in next, data do
-        local cc = chr.category
-        if is_letter[cc] then
-            local range = chr.range
-            if range then
-                for i=range.first,range.last do
-                    texsetcatcode(i,11) -- letter
-                    texsetlccode(i,i,i) -- self self
-                end
-            else
-                local lc, uc = chr.lccode, chr.uccode
-                if not lc then
-                    chr.lccode, lc = code, code
-                elseif type(lc) == "table" then
-                    lc = code
-                end
-                if not uc then
-                    chr.uccode, uc = code, code
-                elseif type(uc) == "table" then
-                    uc = code
-                end
-                texsetcatcode(code,11)   -- letter
-                texsetlccode(code,lc,uc)
-                if traditional and cc == "lu" then
-                    texsetsfcode(code,999)
-                end
-            end
-        elseif is_mark[cc] then
-            texsetlccode(code,code,code) -- for hyphenation
-        end
-    end
-    if traditional then
-        sfstate = "traditional"
-    end
-end
-
--- If this is something that is not documentwide and used a lot, then we
--- need a more clever approach (trivial but not now).
-
-local function setuppersfcodes(v,n)
-    if sfstate ~= "unset" then
-        report_defining("setting uppercase sf codes to %a",n)
-        for code, chr in next, data do
-            if chr.category == "lu" then
-                texsetsfcode(code,n)
-            end
-        end
-    end
-    sfstate = v
-end
-
-directives.register("characters.spaceafteruppercase",function(v)
-    if v == "traditional" then
-        setuppersfcodes(v,999)
-    elseif v == "normal" then
-        setuppersfcodes(v,1000)
-    end
-end)
-
--- tex
-
-function commands.chardescription(slot)
-    local d = data[slot]
-    if d then
-        context(d.description)
-    end
-end
-
--- xml
-
-characters.activeoffset = 0x10000 -- there will be remapped in that byte range
-
-function commands.remapentity(chr,slot)
-    contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
-end
-
--- xml.entities = xml.entities or { }
---
--- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
---
--- function characters.setmkiventities()
---     local entities = xml.entities
---     entities.lt  = "<"
---     entities.amp = "&"
---     entities.gt  = ">"
--- end
---
--- function characters.setmkiientities()
---     local entities = xml.entities
---     entities.lt  = utfchar(characters.activeoffset + utfbyte("<"))
---     entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
---     entities.gt  = utfchar(characters.activeoffset + utfbyte(">"))
--- end
+-- code moved to char-tex.lua
 
-commands.definecatcodetable = characters.define
-commands.setcharactercodes  = characters.setcodes
+return characters
diff --git a/tex/context/base/char-ini.mkiv b/tex/context/base/char-ini.mkiv
index db52ae723..4fb63d93e 100644
--- a/tex/context/base/char-ini.mkiv
+++ b/tex/context/base/char-ini.mkiv
@@ -13,9 +13,7 @@
 
 \writestatus{loading}{ConTeXt Character Support / Initialization}
 
-\registerctxluafile{char-def}{1.001} % let's load this one first
-\registerctxluafile{char-ini}{1.001}
-\registerctxluafile{char-cjk}{1.001}
+\registerctxluafile{char-fio}{1.001}
 \registerctxluafile{char-map}{1.001} % maybe we will load this someplace else
 \registerctxluafile{char-tex}{1.001}
 
diff --git a/tex/context/base/char-tex.lua b/tex/context/base/char-tex.lua
index 472cae930..a9a760c7a 100644
--- a/tex/context/base/char-tex.lua
+++ b/tex/context/base/char-tex.lua
@@ -7,16 +7,130 @@ if not modules then modules = { } end modules ['char-tex'] = {
 }
 
 local lpeg = lpeg
+local context = context
+local commands = commands
 
-local find = string.find
+local next, type = next, type
+local format, find, gmatch = string.format, string.find, string.gmatch
+local utfchar, utfbyte = utf.char, utf.byte
+local concat, tohash = table.concat, table.tohash
 local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
-local U, lpegmatch = lpeg.patterns.utf8, lpeg.match
 
-local allocate, mark = utilities.storage.allocate, utilities.storage.mark
+local lpegpatterns          = lpeg.patterns
+local lpegmatch             = lpeg.match
+local utf8byte              = lpegpatterns.utf8byte
+local utf8char              = lpegpatterns.utf8char
+local utfchartabletopattern = lpeg.utfchartabletopattern
 
-characters       = characters or { }
-local characters = characters
-characters.tex   = characters.tex or { }
+local allocate              = utilities.storage.allocate
+local mark                  = utilities.storage.mark
+
+local characters            = characters
+local texcharacters         = { }
+characters.tex              = texcharacters
+local utffilters            = characters.filters.utf
+
+local is_character          = characters.is_character
+local is_letter             = characters.is_letter
+local is_command            = characters.is_command
+local is_spacing            = characters.is_spacing
+local is_mark               = characters.is_mark
+local is_punctuation        = characters.is_punctuation
+
+local data                  = characters.data  if not data then return end
+local blocks                = characters.blocks
+
+local trace_defining        = false  trackers.register("characters.defining", function(v) characters_defining = v end)
+
+local report_defining       = logs.reporter("characters")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+--[[ldx--
+<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to
+8-bit. This is handled in the <l n='luatex'/> engine itself.</p>
+
+<p>This leaves us problems with characters that are specific to <l n='tex'/> like
+<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files
+are sensitive for to a private area (while writing to a utility file) and revert then
+to their original slot when we read in such a file. Instead of reverting, we can (when
+we resolve characters to glyphs) map them to their right glyph there. For this purpose
+we can use the private planes 0x0F0000 and 0x100000.</p>
+--ldx]]--
+
+local low     = allocate()
+local high    = allocate()
+local escapes = allocate()
+local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"
+
+local private = {
+    low     = low,
+    high    = high,
+    escapes = escapes,
+}
+
+utffilters.private = private
+
+for ch in gmatch(special,".") do
+    local cb
+    if type(ch) == "number" then
+        cb, ch = ch, utfchar(ch)
+    else
+        cb = utfbyte(ch)
+    end
+    if cb < 256 then
+        escapes[ch] = "\\" .. ch
+        low[ch] = utfchar(0x0F0000 + cb)
+        if ch == "%" then
+            ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
+        end
+        high[utfchar(0x0F0000 + cb)] = ch
+    end
+end
+
+local tohigh = lpeg.replacer(low)   -- frozen, only for basic tex
+local tolow  = lpeg.replacer(high)  -- frozen, only for basic tex
+
+lpegpatterns.utftohigh = tohigh
+lpegpatterns.utftolow  = tolow
+
+function utffilters.harden(str)
+    return lpegmatch(tohigh,str)
+end
+
+function utffilters.soften(str)
+    return lpegmatch(tolow,str)
+end
+
+private.escape  = utf.remapper(escapes)
+private.replace = utf.remapper(low)
+private.revert  = utf.remapper(high)
+
+--[[ldx--
+<p>We get a more efficient variant of this when we integrate
+replacements in collapser. This more or less renders the previous
+private code redundant. The following code is equivalent but the
+first snippet uses the relocated dollars.</p>
+
+<typing>
+[󰀤x󰀤] [$x$]
+</typing>
+--ldx]]--
+
+-- using the tree-lpeg-mapper would be nice but we also need to deal with end-of-string
+-- cases: "\"\i" and don't want "\relax" to be seen as \r e lax" (for which we need to mess
+-- with spaces
 
 local accentmapping = allocate {
     ['"'] = { [""] = "¨",
@@ -128,7 +242,7 @@ local accentmapping = allocate {
     },
 }
 
-characters.tex.accentmapping = accentmapping
+texcharacters.accentmapping = accentmapping
 
 local accent_map = allocate { -- incomplete
    ['~'] = "̃" , --  ̃ Ẽ
@@ -150,7 +264,7 @@ local accent_map = allocate { -- incomplete
     --  ̰ Ḛ
 }
 
--- local accents = table.concat(table.keys(accentmapping)) -- was _map
+-- local accents = concat(table.keys(accentmapping)) -- was _map
 
 local function remap_accent(a,c,braced)
     local m = accentmapping[a]
@@ -171,7 +285,7 @@ local function remap_accent(a,c,braced)
     end
 end
 
-local command_map = allocate {
+local commandmapping = allocate {
     ["i"]  = "ı",
     ["l"]  = "ł",
     ["ss"] = "ß",
@@ -185,68 +299,125 @@ local command_map = allocate {
     ["AA"] = "Å",
 }
 
--- no need for U here
-
-local achar    = R("az","AZ") + P("ı") + P("\\i")
+texcharacters.commandmapping = commandmapping
 
-local spaces   = P(" ")^0
-local no_l     = P("{") / ""
-local no_r     = P("}") / ""
-local no_b     = P('\\') / ""
+-- local achar    = R("az","AZ") + P("ı") + P("\\i")
+--
+-- local spaces   = P(" ")^0
+-- local no_l     = P("{") / ""
+-- local no_r     = P("}") / ""
+-- local no_b     = P('\\') / ""
+--
+-- local lUr      = P("{") * C(achar) * P("}")
+--
+-- local accents_1 = [["'.=^`~]]
+-- local accents_2 = [[Hckruv]]
+--
+-- local accent   = P('\\') * (
+--     C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up
+--     C(S(accents_2)) *  lUr * Cc(true)
+-- ) / remap_accent
+--
+-- local csname  = P('\\') * C(R("az","AZ")^1)
+--
+-- local command  = (
+--     csname +
+--     P("{") * csname * spaces * P("}")
+-- ) / commandmapping -- remap_commands
+--
+-- local both_1 = Cs { "run",
+--     accent  = accent,
+--     command = command,
+--     run     = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0,
+-- }
+--
+-- local both_2 = Cs { "run",
+--     accent  = accent,
+--     command = command,
+--     run     = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0,
+-- }
+--
+-- function texcharacters.toutf(str,strip)
+--     if not find(str,"\\") then
+--         return str
+--     elseif strip then
+--         return lpegmatch(both_1,str)
+--     else
+--         return lpegmatch(both_2,str)
+--     end
+-- end
 
-local lUr      = P("{") * C(achar) * P("}")
+local untex
 
-local accents_1 = [["'.=^`~]]
-local accents_2 = [[Hckruv]]
+local function toutfpattern()
+    if not untex then
+        local hash = { }
+        for k, v in next, accentmapping do
+            for kk, vv in next, v do
+                if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
+                    hash[ "\\"..k.." "..kk     ] = vv
+                    hash["{\\"..k.." "..kk.."}"] = vv
+                else
+                    hash["\\" ..k     ..kk     ] = vv
+                    hash["{\\"..k     ..kk.."}"] = vv
+                end
+                hash["\\" ..k.."{"..kk.."}" ] = vv
+                hash["{\\"..k.."{"..kk.."}}"] = vv
+            end
+        end
+        for k, v in next, commandmapping do
+            hash["\\"..k.." "] = v
+            hash["{\\"..k.."}"] = v
+            hash["{\\"..k.." }"] = v
+        end
+        untex = utfchartabletopattern(hash) / hash
+    end
+    return untex
+end
 
-local accent   = P('\\') * (
-    C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up
-    C(S(accents_2)) *  lUr * Cc(true)
-) / remap_accent
+texcharacters.toutfpattern = toutfpattern
 
-local csname  = P('\\') * C(R("az","AZ")^1)
+local pattern = nil
 
-local command  = (
-    csname +
-    P("{") * csname * spaces * P("}")
-) / command_map -- remap_commands
+local function prepare()
+    pattern = Cs((toutfpattern() + P(1))^0)
+    return pattern
+end
 
-local both_1 = Cs { "run",
-    accent  = accent,
-    command = command,
-    run     = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0,
-}
+function texcharacters.toutf(str,strip)
+    if str == "" then
+        return str
+    elseif not find(str,"\\") then
+        return str
+ -- elseif strip then
+    else
+        return lpegmatch(pattern or prepare(),str)
+    end
+end
 
-local both_2 = Cs { "run",
-    accent  = accent,
-    command = command,
-    run     = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0,
-}
+-- print(texcharacters.toutf([[\~{Z}]],true))
+-- print(texcharacters.toutf([[\'\i]],true))
+-- print(texcharacters.toutf([[\'{\i}]],true))
+-- print(texcharacters.toutf([[\"{e}]],true))
+-- print(texcharacters.toutf([[\" {e}]],true))
+-- print(texcharacters.toutf([[{\"{e}}]],true))
+-- print(texcharacters.toutf([[{\" {e}}]],true))
+-- print(texcharacters.toutf([[{\l}]],true))
+-- print(texcharacters.toutf([[{\l }]],true))
+-- print(texcharacters.toutf([[\v{r}]],true))
+-- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true))
+-- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))
 
-function characters.tex.toutf(str,strip)
-    if not find(str,"\\") then
-        return str
-    elseif strip then
-        return lpegmatch(both_1,str)
+function texcharacters.safechar(n) -- was characters.safechar
+    local c = data[n]
+    if c and c.contextname then
+        return "\\" .. c.contextname
     else
-        return lpegmatch(both_2,str)
+        return utfchar(n)
     end
 end
 
--- print(characters.tex.toutf([[\~{Z}]],true))
--- print(characters.tex.toutf([[\'\i]],true))
--- print(characters.tex.toutf([[\'{\i}]],true))
--- print(characters.tex.toutf([[\"{e}]],true))
--- print(characters.tex.toutf([[\" {e}]],true))
--- print(characters.tex.toutf([[{\"{e}}]],true))
--- print(characters.tex.toutf([[{\" {e}}]],true))
--- print(characters.tex.toutf([[{\l}]],true))
--- print(characters.tex.toutf([[{\l }]],true))
--- print(characters.tex.toutf([[\v{r}]],true))
--- print(characters.tex.toutf([[fo{\"o}{\ss}ar]],true))
--- print(characters.tex.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))
-
-function characters.tex.defineaccents()
+function texcharacters.defineaccents()
     for accent, group in next, accentmapping do
         context.dodefineaccentcommand(accent)
         for character, mapping in next, group do
@@ -254,3 +425,256 @@ function characters.tex.defineaccents()
         end
     end
 end
+
+-- all kind of initializations
+
+local tex           = tex
+local texsetlccode  = tex.setlccode
+local texsetuccode  = tex.setuccode
+local texsetsfcode  = tex.setsfcode
+local texsetcatcode = tex.setcatcode
+
+local contextsprint = context.sprint
+local ctxcatcodes   = catcodes.numbers.ctxcatcodes
+
+--[[ldx--
+<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
+use the table. After all, we have this information available anyway.</p>
+--ldx]]--
+
+function commands.makeactive(n,name) --
+    contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
+ -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
+end
+
+function commands.utfchar(c,n)
+    if n then
+     -- contextsprint(c,charfromnumber(n))
+        contextsprint(c,utfchar(n))
+    else
+     -- contextsprint(charfromnumber(c))
+        contextsprint(utfchar(c))
+    end
+end
+
+function commands.safechar(n)
+    local c = data[n]
+    if c and c.contextname then
+        contextsprint("\\" .. c.contextname) -- context[c.contextname]()
+    else
+        contextsprint(utfchar(n))
+    end
+end
+
+tex.uprint = commands.utfchar
+
+local forbidden = tohash { -- at least now
+    0x00A0,
+    0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D,
+    0x202F,
+    0x205F,
+ -- 0xFEFF,
+}
+
+function characters.define(tobelettered, tobeactivated) -- catcodetables
+
+    if trace_defining then
+        report_defining("defining active character commands")
+    end
+
+    local activated, a = { }, 0
+
+    for u, chr in next, data do -- these will be commands
+        local fallback = chr.fallback
+        if fallback then
+            contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}")
+            a = a + 1
+            activated[a] = u
+        else
+            local contextname = chr.contextname
+            if contextname then
+                local category = chr.category
+                if is_character[category] then
+                    if chr.unicodeslot < 128 then
+                        if is_letter[category] then
+                            contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
+                        else
+                            contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s
+                        end
+                    else
+                        contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
+                    end
+                elseif is_command[category] and not forbidden[u] then
+                    contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}")
+                    a = a + 1
+                    activated[a] = u
+                end
+            end
+        end
+    end
+
+    if tobelettered then -- shared
+        local saved = tex.catcodetable
+        for i=1,#tobelettered do
+            tex.catcodetable = tobelettered[i]
+            if trace_defining then
+                report_defining("defining letters (global, shared)")
+            end
+            for u, chr in next, data do
+                if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then
+                    texsetcatcode(u,11)
+                end
+                local range = chr.range
+                if range then
+                    for i=1,range.first,range.last do -- tricky as not all are letters
+                        texsetcatcode(i,11)
+                    end
+                end
+            end
+            texsetcatcode(0x200C,11) -- non-joiner
+            texsetcatcode(0x200D,11) -- joiner
+            for k, v in next, blocks do
+                if v.catcode == "letter" then
+                    for i=v.first,v.last do
+                        texsetcatcode(i,11)
+                    end
+                end
+            end
+        end
+        tex.catcodetable = saved
+    end
+
+    local nofactivated = #tobeactivated
+    if tobeactivated and nofactivated > 0 then
+        for i=1,nofactivated do
+            local u = activated[i]
+            if u then
+                report_defining("character %U is active in set %a, containing %a",u,data[u].description,tobeactivated)
+            end
+        end
+        local saved = tex.catcodetable
+        for i=1,#tobeactivated do
+            local vector = tobeactivated[i]
+            if trace_defining then
+                report_defining("defining %a active characters in vector %a",nofactivated,vector)
+            end
+            tex.catcodetable = vector
+            for i=1,nofactivated do
+                local u = activated[i]
+                if u then
+                    texsetcatcode(u,13)
+                end
+            end
+        end
+        tex.catcodetable = saved
+    end
+
+end
+
+--[[ldx--
+<p>Setting the lccodes is also done in a loop over the data table.</p>
+--ldx]]--
+
+local sfmode = "unset" -- unset, traditional, normal
+
+function characters.setcodes()
+    if trace_defining then
+        report_defining("defining lc and uc codes")
+    end
+    local traditional = sfstate == "traditional" or sfstate == "unset"
+    for code, chr in next, data do
+        local cc = chr.category
+        if is_letter[cc] then
+            local range = chr.range
+            if range then
+                for i=range.first,range.last do
+                    texsetcatcode(i,11) -- letter
+                    texsetlccode(i,i,i) -- self self
+                end
+            else
+                local lc, uc = chr.lccode, chr.uccode
+                if not lc then
+                    chr.lccode, lc = code, code
+                elseif type(lc) == "table" then
+                    lc = code
+                end
+                if not uc then
+                    chr.uccode, uc = code, code
+                elseif type(uc) == "table" then
+                    uc = code
+                end
+                texsetcatcode(code,11)   -- letter
+                texsetlccode(code,lc,uc)
+                if traditional and cc == "lu" then
+                    texsetsfcode(code,999)
+                end
+            end
+        elseif is_mark[cc] then
+            texsetlccode(code,code,code) -- for hyphenation
+        end
+    end
+    if traditional then
+        sfstate = "traditional"
+    end
+end
+
+-- If this is something that is not documentwide and used a lot, then we
+-- need a more clever approach (trivial but not now).
+
+local function setuppersfcodes(v,n)
+    if sfstate ~= "unset" then
+        report_defining("setting uppercase sf codes to %a",n)
+        for code, chr in next, data do
+            if chr.category == "lu" then
+                texsetsfcode(code,n)
+            end
+        end
+    end
+    sfstate = v
+end
+
+directives.register("characters.spaceafteruppercase",function(v)
+    if v == "traditional" then
+        setuppersfcodes(v,999)
+    elseif v == "normal" then
+        setuppersfcodes(v,1000)
+    end
+end)
+
+-- tex
+
+function commands.chardescription(slot)
+    local d = data[slot]
+    if d then
+        context(d.description)
+    end
+end
+
+-- xml
+
+characters.activeoffset = 0x10000 -- there will be remapped in that byte range
+
+function commands.remapentity(chr,slot)
+    contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
+end
+
+-- xml.entities = xml.entities or { }
+--
+-- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
+--
+-- function characters.setmkiventities()
+--     local entities = xml.entities
+--     entities.lt  = "<"
+--     entities.amp = "&"
+--     entities.gt  = ">"
+-- end
+--
+-- function characters.setmkiientities()
+--     local entities = xml.entities
+--     entities.lt  = utfchar(characters.activeoffset + utfbyte("<"))
+--     entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
+--     entities.gt  = utfchar(characters.activeoffset + utfbyte(">"))
+-- end
+
+commands.definecatcodetable = characters.define
+commands.setcharactercodes  = characters.setcodes
diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua
index 98a780dcd..fcd300f6b 100644
--- a/tex/context/base/char-utf.lua
+++ b/tex/context/base/char-utf.lua
@@ -6,11 +6,6 @@ if not modules then modules = { } end modules ['char-utf'] = {
     license   = "see context related readme files"
 }
 
--- todo: trackers
--- todo: no longer special characters (high) here, only needed in special cases and
--- these don't go through this file anyway
--- graphemes: basic symbols
-
 --[[ldx--
 <p>When a sequence of <l n='utf'/> characters enters the application, it may be
 neccessary to collapse subsequences into their composed variant.</p>
@@ -24,44 +19,46 @@ of output (for instance <l n='pdf'/>).</p>
 over a string.</p>
 --ldx]]--
 
-local gmatch, gsub, find = string.gmatch, string.gsub, string.find
+local gsub, find = string.gsub, string.find
 local concat, sortedhash, keys, sort = table.concat, table.sortedhash, table.keys, table.sort
 local utfchar, utfbyte, utfcharacters, utfvalues = utf.char, utf.byte, utf.characters, utf.values
-local allocate = utilities.storage.allocate
-local lpegmatch, lpegpatterns, P, Cs, Cmt, Ct = lpeg.match, lpeg.patterns, lpeg.P, lpeg.Cs, lpeg.Cmt, lpeg.Ct
+local P, Cs, Cmt, Ct = lpeg.P, lpeg.Cs, lpeg.Cmt, lpeg.Ct
+
+if not characters        then require("char-def") end
+if not characters.blocks then require("char-ini") end
 
+local lpegmatch             = lpeg.match
+local lpegpatterns          = lpeg.patterns
 local p_utf8character       = lpegpatterns.utf8character
 local utfchartabletopattern = lpeg.utfchartabletopattern
 
-if not characters then
-    require("char-def")
-end
+local allocate              = utilities.storage.allocate or function() return { } end
 
-local charfromnumber   = characters.fromnumber
+local charfromnumber        = characters.fromnumber
 
-characters             = characters or { }
-local characters       = characters
+characters                  = characters or { }
+local characters            = characters
 
-local graphemes        = allocate()
-characters.graphemes   = graphemes
+local graphemes             = allocate()
+characters.graphemes        = graphemes
 
-local collapsed        = allocate()
-characters.collapsed   = collapsed
+local collapsed             = allocate()
+characters.collapsed        = collapsed
 
-local combined         = allocate()
-characters.combined    = combined
+local combined              = allocate()
+characters.combined         = combined
 
-local decomposed       = allocate()
-characters.decomposed  = decomposed
+local decomposed            = allocate()
+characters.decomposed       = decomposed
 
-local mathpairs        = allocate()
-characters.mathpairs   = mathpairs
+local mathpairs             = allocate()
+characters.mathpairs        = mathpairs
 
-local filters          = allocate()
-characters.filters     = filters
+local filters               = allocate()
+characters.filters          = filters
 
-local utffilters       = { }
-characters.filters.utf = utffilters
+local utffilters            = { }
+characters.filters.utf      = utffilters
 
 -- is characters.combined cached?
 
@@ -221,92 +218,28 @@ end
 characters.initialize = initialize
 
 --[[ldx--
-<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to
-8-bit. This is handled in the <l n='luatex'/> engine itself.</p>
-
-<p>This leaves us problems with characters that are specific to <l n='tex'/> like
-<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files
-are sensitive for to a private area (while writing to a utility file) and revert then
-to their original slot when we read in such a file. Instead of reverting, we can (when
-we resolve characters to glyphs) map them to their right glyph there. For this purpose
-we can use the private planes 0x0F0000 and 0x100000.</p>
---ldx]]--
-
-local low     = allocate()
-local high    = allocate()
-local escapes = allocate()
-local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"
-
-local private = {
-    low     = low,
-    high    = high,
-    escapes = escapes,
-}
-
-utffilters.private = private
-
-local tohigh = lpeg.replacer(low)   -- frozen, only for basic tex
-local tolow  = lpeg.replacer(high)  -- frozen, only for basic tex
-
-lpegpatterns.utftohigh = tohigh
-lpegpatterns.utftolow  = tolow
-
-function utffilters.harden(str)
-    return lpegmatch(tohigh,str)
-end
-
-function utffilters.soften(str)
-    return lpegmatch(tolow,str)
-end
-
-local function set(ch)
-    local cb
-    if type(ch) == "number" then
-        cb, ch = ch, utfchar(ch)
-    else
-        cb = utfbyte(ch)
-    end
-    if cb < 256 then
-        escapes[ch] = "\\" .. ch
-        low[ch] = utfchar(0x0F0000 + cb)
-        if ch == "%" then
-            ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
-        end
-        high[utfchar(0x0F0000 + cb)] = ch
-    end
-end
-
-private.set = set
-
--- function private.escape (str) return    gsub(str,"(.)", escapes) end
--- function private.replace(str) return utfgsub(str,"(.)", low    ) end
--- function private.revert (str) return utfgsub(str,"(.)", high   ) end
-
-private.escape  = utf.remapper(escapes)
-private.replace = utf.remapper(low)
-private.revert  = utf.remapper(high)
-
-for ch in gmatch(special,".") do set(ch) end
-
---[[ldx--
-<p>We get a more efficient variant of this when we integrate
-replacements in collapser. This more or less renders the previous
-private code redundant. The following code is equivalent but the
-first snippet uses the relocated dollars.</p>
-
-<typing>
-[󰀤x󰀤] [$x$]
-</typing>
-
 <p>The next variant has lazy token collecting, on a 140 page mk.tex this saves
 about .25 seconds, which is understandable because we have no graphemes and
 not collecting tokens is not only faster but also saves garbage collecting.
 </p>
 --ldx]]--
 
-local skippable  = table.tohash { "mkiv", "mkvi", "mkix", "mkxi" }
+local skippable  = { }
 local filesuffix = file.suffix
 
+function utffilters.setskippable(suffix,value)
+    if value == nil then
+        value = true
+    end
+    if type(suffix) == "table" then
+        for i=1,#suffix do
+            skippable[suffix[i]] = value
+        end
+    else
+        skippable[suffix] = value
+    end
+end
+
 -- function utffilters.collapse(str,filename)   -- we can make high a seperate pass (never needed with collapse)
 --     if skippable[filesuffix(filename)] then
 --         return str
@@ -406,7 +339,7 @@ local filesuffix = file.suffix
 --                 return concat(tokens) -- seldom called
 --             end
 --         elseif nstr > 0 then
---             return high[str] or str -- thsi will go from here
+--             return high[str] or str -- this will go from here
 --         end
 --     end
 --     return str
@@ -420,7 +353,7 @@ local function prepare()
     if initialize then
         initialize()
     end
-    local tree = utfchartabletopattern(keys(collapsed))
+    local tree = utfchartabletopattern(collapsed)
     p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
 end
 
@@ -487,7 +420,7 @@ end
 --         if initialize then
 --             initialize()
 --         end
---         local tree = utfchartabletopattern(keys(decomposed))
+--         local tree = utfchartabletopattern(decomposed)
 --         finder   = lpeg.finder(tree,false,true)
 --         replacer = lpeg.replacer(tree,decomposed,false,true)
 --     end
@@ -503,11 +436,11 @@ local function prepare()
     if initialize then
         initialize()
     end
-    local tree = utfchartabletopattern(keys(decomposed))
+    local tree = utfchartabletopattern(decomposed)
     p_decompose = Cs((tree/decomposed + p_utf8character)^0 * P(-1))
 end
 
-function utffilters.decompose(str) -- 3 to 4 times faster than the above
+function utffilters.decompose(str,filename) -- 3 to 4 times faster than the above
     if not p_decompose then
         prepare()
     end
@@ -619,12 +552,12 @@ local function prepare()
             hash[utfchar(k)] = { utfchar(k), combining, 0 } -- slot 3 can be used in sort
         end
     end
-    local e = utfchartabletopattern(keys(exceptions))
-    local p = utfchartabletopattern(keys(hash))
+    local e = utfchartabletopattern(exceptions)
+    local p = utfchartabletopattern(hash)
     p_reorder = Cs((e/exceptions + Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1)
 end
 
-function utffilters.reorder(str)
+function utffilters.reorder(str,filename)
     if not p_reorder then
         prepare()
     end
@@ -638,141 +571,6 @@ function utffilters.reorder(str)
     return str
 end
 
--- --
-
-local sequencers = utilities.sequencers
-
-if sequencers then
-
-    local textfileactions = resolvers.openers.helpers.textfileactions
-    local textlineactions = resolvers.openers.helpers.textlineactions
-
-    sequencers.appendaction (textfileactions,"system","characters.filters.utf.reorder")
-    sequencers.disableaction(textfileactions,"characters.filters.utf.reorder")
-
-    sequencers.appendaction (textlineactions,"system","characters.filters.utf.reorder")
-    sequencers.disableaction(textlineactions,"characters.filters.utf.reorder")
-
-    sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse")
-    sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
-
-    sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose")
-    sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
-
-    function characters.filters.utf.enable()
-        sequencers.enableaction(textfileactions,"characters.filters.utf.reorder")
-        sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
-        sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
-    end
-
-    local function configure(what,v)
-        if not v then
-            sequencers.disableaction(textfileactions,what)
-            sequencers.disableaction(textlineactions,what)
-        elseif v == "line" then
-            sequencers.disableaction(textfileactions,what)
-            sequencers.enableaction (textlineactions,what)
-        else -- true or text
-            sequencers.enableaction (textfileactions,what)
-            sequencers.disableaction(textlineactions,what)
-        end
-    end
-
-    directives.register("filters.utf.reorder", function(v)
-        configure("characters.filters.utf.reorder",v)
-    end)
-
-    directives.register("filters.utf.collapse", function(v)
-        configure("characters.filters.utf.collapse",v)
-    end)
-
-    directives.register("filters.utf.decompose", function(v)
-        configure("characters.filters.utf.decompose",v)
-    end)
-
-end
-
--- Faster when we deal with lots of data but somewhat complicated by the fact that we want to be
--- downward compatible .. so maybe some day I'll simplify it. We seldom have large quantities of
--- text.
-
--- local p_processed = nil -- so we can reset if needed
---
--- function utffilters.preprocess(str,filename)
---     if not p_processed then
---         if initialize then
---             initialize()
---         end
---         local merged = table.merged(collapsed,decomposed)
---         local tree   = utfchartabletopattern(keys(merged))
---         p_processed  = Cs((tree/merged     + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
---         local tree   = utfchartabletopattern(keys(collapsed))
---         p_collapse   = Cs((tree/collapsed  + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
---         local tree   = utfchartabletopattern(keys(decomposed))
---         p_decompose  = Cs((tree/decomposed + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
---     end
---     if not str or #str == "" or #str == 1 then
---         return str
---     elseif filename and skippable[filesuffix(filename)] then -- we could hash the collapsables or do a quicker test
---         return str
---     else
---         return lpegmatch(p_processed,str) or str
---     end
--- end
---
--- local sequencers = utilities.sequencers
---
--- if sequencers then
---
---     local textfileactions = resolvers.openers.helpers.textfileactions
---
---     local collapse, decompose = false, false
---
---     sequencers.appendaction (textfileactions,"system","characters.filters.utf.preprocess")
---     sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
---
---     local function checkable()
---         if decompose then
---             if collapse then
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
---                 sequencers.enableaction (textfileactions,"characters.filters.utf.preprocess")
---             else
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
---                 sequencers.enableaction (textfileactions,"characters.filters.utf.decompose")
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
---             end
---         else
---             if collapse then
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
---             else
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
---                 sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
---             end
---         end
---     end
---
---     function characters.filters.utf.enable()
---         collapse  = true
---         decompose = true
---         checkable()
---     end
---
---     directives.register("filters.utf.collapse", function(v)
---         collapse = v
---         checkable()
---     end)
---
---     directives.register("filters.utf.decompose", function(v)
---         decompose = v
---         checkable()
---     end)
---
--- end
-
 -- local collapse   = utffilters.collapse
 -- local decompose  = utffilters.decompose
 -- local preprocess = utffilters.preprocess
@@ -815,3 +613,5 @@ end
 -- local done = utffilters.reorder(test)
 --
 -- print(test,done,test==done,false)
+
+return characters
diff --git a/tex/context/base/char-utf.mkiv b/tex/context/base/char-utf.mkiv
index 280e7ef6d..381360905 100644
--- a/tex/context/base/char-utf.mkiv
+++ b/tex/context/base/char-utf.mkiv
@@ -22,22 +22,15 @@
 
 \unprotect
 
+\registerctxluafile{char-def}{1.001}
+\registerctxluafile{char-ini}{1.001}
 \registerctxluafile{char-utf}{1.001}
+\registerctxluafile{char-cjk}{1.001}
 
 %D We enable collapsing (combining characters) by default, but
 %D since the source files are rather simple, we postpone the
 %D initialization till runtime.
 
-% resolvers.filters.install('utf',characters.filters.utf.collapse)
-
-% \appendtoks
-%     \ctxlua{
-%         local textfileactions = resolvers.openers.helpers.textfileactions
-%         utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
-%         utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
-%     }%
-% \to \everyjob
-
 \appendtoks
     \ctxlua{characters.filters.utf.enable()}%
 \to \everyjob
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index 22bda98b0..c9d8a19e0 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
-\newcontextversion{2014.07.04 15:55}
+\newcontextversion{2014.07.06 21:17}
 
 %D This file is loaded at runtime, thereby providing an excellent place for
 %D hacks, patches, extensions and new features.
diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf
index bb3c1a555..3fddcdb4c 100644
--- a/tex/context/base/context-version.pdf
+++ b/tex/context/base/context-version.pdf
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index f92d65902..468493ce1 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -28,7 +28,7 @@
 %D up and the dependencies are more consistent.
 
 \edef\contextformat {\jobname}
-\edef\contextversion{2014.07.04 15:55}
+\edef\contextversion{2014.07.06 21:17}
 \edef\contextkind   {beta}
 
 %D For those who want to use this:
@@ -112,9 +112,9 @@
 
 \loadmarkfile{supp-dir}
 
-\loadmarkfile{char-ini}
-\loadmarkfile{char-utf}
-\loadmarkfile{char-act}
+\loadmarkfile{char-utf} % generic code (i.e. not much tex) ... could become unic-ini
+\loadmarkfile{char-ini} % tex / context specific
+\loadmarkfile{char-act} % even more specific
 
 \loadmarkfile{mult-ini}
 \loadmarkfile{mult-sys}
diff --git a/tex/context/base/font-enc.lua b/tex/context/base/font-enc.lua
index 5305f0736..2e8b722de 100644
--- a/tex/context/base/font-enc.lua
+++ b/tex/context/base/font-enc.lua
@@ -8,6 +8,7 @@ if not modules then modules = { } end modules ['font-enc'] = {
 
 -- this module is obsolete
 
+local next = next
 local match, gmatch, gsub = string.match, string.gmatch, string.gsub
 
 local setmetatableindex = table.setmetatableindex
@@ -125,7 +126,12 @@ function encodings.make_unicode_vector()
         end
     end
     for name, code in next, characters.synonyms do
-        vector[code], hash[name] = name, code
+        if not vector[code] then
+            vector[code] = name
+        end
+        if not hash[name] then
+            hash[name]   = code
+        end
     end
     return containers.write(encodings.cache, 'unicode', { name='unicode', tag='unicode', vector=vector, hash=hash })
 end
diff --git a/tex/context/base/font-pre.mkiv b/tex/context/base/font-pre.mkiv
index fc6eb289e..cb5b193f6 100644
--- a/tex/context/base/font-pre.mkiv
+++ b/tex/context/base/font-pre.mkiv
@@ -100,14 +100,14 @@
    features=no]
 
 \definefontfeature
-  [semetic-complete]
+  [semitic-complete]
   [mode=node,analyze=yes,language=dflt,ccmp=yes,
    init=yes,medi=yes,fina=yes,isol=yes,
    mark=yes,mkmk=yes,kern=yes,curs=yes,
    liga=yes,dlig=yes,rlig=yes,clig=yes,calt=yes]
 
 \definefontfeature
-  [semetic-simple]
+  [semitic-simple]
   [mode=node,analyze=yes,language=dflt,ccmp=yes,
    init=yes,medi=yes,fina=yes,isol=yes,
    mark=yes,mkmk=yes,kern=yes,curs=yes,
@@ -115,22 +115,22 @@
 
 \definefontfeature
   [arabic]
-  [semetic-complete]
+  [semitic-complete]
   [script=arab]
 
 \definefontfeature
   [hebrew]
-  [semetic-complete]
+  [semitic-complete]
   [script=hebr]
 
 \definefontfeature
   [simplearabic]
-  [semetic-simple]
+  [semitic-simple]
   [script=arab]
 
 \definefontfeature
   [simplehebrew]
-  [semetic-simple]
+  [semitic-simple]
   [script=hebr]
 
 % \definefont [DevaOne] [file:chandas.ttf*devanagari-one at 12pt]
diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua
index c203d8044..79e75a7b7 100644
--- a/tex/context/base/l-lpeg.lua
+++ b/tex/context/base/l-lpeg.lua
@@ -897,17 +897,35 @@ end
 function lpeg.utfchartabletopattern(list) -- goes to util-lpg
     local tree = { }
     local hash = { }
-    for i=1,#list do
-        local t = tree
-        for c in gmatch(list[i],".") do
-            local tc = t[c]
-            if not tc then
-                tc = { }
-                t[c] = tc
+    local n = #list
+    if n == 0 then
+        -- we could always use this branch
+        for s in next, list do
+            local t = tree
+            for c in gmatch(s,".") do
+                local tc = t[c]
+                if not tc then
+                    tc = { }
+                    t[c] = tc
+                end
+                t = tc
+            end
+            hash[t] = s
+        end
+    else
+        for i=1,n do
+            local t = tree
+            local s = list[i]
+            for c in gmatch(s,".") do
+                local tc = t[c]
+                if not tc then
+                    tc = { }
+                    t[c] = tc
+                end
+                t = tc
             end
-            t = tc
+            hash[t] = s
         end
-        hash[t] = list[i]
     end
     return make(tree,hash)
 end
diff --git a/tex/context/base/publ-aut.lua b/tex/context/base/publ-aut.lua
index b35af1bcc..0167d66e7 100644
--- a/tex/context/base/publ-aut.lua
+++ b/tex/context/base/publ-aut.lua
@@ -233,6 +233,7 @@ local function the_initials(initials,symbol)
 end
 
 local ctx_btxsetconcat        = context.btxsetconcat
+local ctx_btxsetauthorindex   = context.btxsetauthorindex
 local ctx_btxsetoverflow      = context.btxsetoverflow
 local ctx_btxsetinitials      = context.btxsetinitials
 local ctx_btxsetfirstnames    = context.btxsetfirstnames
@@ -248,6 +249,56 @@ local ctx_btxstopauthor       = context.btxstopauthor
 local concatstate = publications.concatstate
 local f_invalid   = formatters["<invalid %s: %s>"]
 
+local currentauthordata   = nil
+local currentauthorsymbol = nil
+
+local manipulators       = typesetters.manipulators
+local splitmanipulation  = manipulators.splitspecification
+local applymanipulation  = manipulators.applyspecification
+local manipulatormethods = manipulators.methods
+
+local function value(i,field)
+    if currentauthordata then
+        local entry = currentauthordata[i]
+        if entry then
+            local value = entry[field]
+            if value and #value > 0 then
+                return value
+            end
+        end
+    end
+end
+
+function commands.btx_a_i(i) local v = value(i,"initials")   if v then context(concat(the_initials(v,currentauthorsymbol or "."))) end end
+function commands.btx_a_f(i) local v = value(i,"firstnames") if v then context(concat(v," ")) end end
+function commands.btx_a_j(i) local v = value(i,"juniors")    if v then context(concat(v," ")) end end
+function commands.btx_a_s(i) local v = value(i,"surnames")   if v then context(concat(v," ")) end end
+function commands.btx_a_v(i) local v = value(i,"vons")       if v then context(concat(v," ")) end end
+
+function commands.btxauthorfield(i,field)
+    if currentauthordata then
+        local entry = currentauthordata[i]
+        if entry then
+            local manipulator, field = splitmanipulation(field)
+            local value = entry[field]
+            if not value or #value == 0 then
+                -- value, no need for message
+            elseif manipulator then
+                for i=1,#value do
+                    if i > 1 then
+                        context(" ") -- symbol ?
+                    end
+                    context(applymanipulation(manipulator,value) or value)
+                end
+            elseif field == "initials" then
+                context(concat(the_initials(value,currentauthorsymbol or ".")))
+            else
+                context(concat(value," "))
+            end
+         end
+    end
+end
+
 function commands.btxauthor(dataset,tag,field,settings)
     local ds = datasets[dataset]
     if not ds then
@@ -279,30 +330,32 @@ function commands.btxauthor(dataset,tag,field,settings)
     if max > etallimit and etaldisplay < max then
         max = etaldisplay
     end
+    currentauthordata   = split
+    currentauthorsymbol = symbol
     for i=1,max do
-        ctx_btxstartauthor() -- i, max
+        ctx_btxstartauthor(i,max)
         ctx_btxsetconcat(concatstate(i,max))
         ctx_btxsetauthorvariant(combiner)
         local author = split[i]
         local initials = author.initials
-        if initials then
-            ctx_btxsetinitials(concat(the_initials(initials,symbol)," "))
+        if initials and #initials > 0 then
+            ctx_btxsetinitials() -- (concat(the_initials(initials,symbol)," "))
         end
         local firstnames = author.firstnames
-        if firstnames then
-            ctx_btxsetfirstnames(concat(firstnames," "))
+        if firstnames and #firstnames > 0 then
+            ctx_btxsetfirstnames() -- (concat(firstnames," "))
         end
         local vons = author.vons
-        if vons then
-            ctx_btxsetvons(concat(vons," "))
+        if vons and #vons > 0 then
+            ctx_btxsetvons() -- (concat(vons," "))
         end
         local surnames = author.surnames
-        if surnames then
-            ctx_btxsetsurnames(concat(surnames," "))
+        if surnames and #surnames > 0 then
+            ctx_btxsetsurnames() -- (concat(surnames," "))
         end
         local juniors = author.juniors
-        if juniors then
-            ctx_btxsetjuniors(concat(juniors," "))
+        if juniors and #juniors > 0 then
+            ctx_btxsetjuniors() -- (concat(juniors," "))
         end
         ctx_btxsetup(combiner)
         ctx_btxstopauthor()
@@ -317,6 +370,7 @@ end
 -- pays off.
 
 local compare  = sorters.comparers.basic -- (a,b)
+-- local compare  = sorters.basicsorter -- (a,b)
 local strip    = sorters.strip
 local splitter = sorters.splitters.utf
 
@@ -480,7 +534,7 @@ function authors.sorted(dataset,list,sorttype) -- experimental
     if #valid == 0 or #valid ~= #list then
         return list
     else
-        sorters.sort(valid,compare)
+        sorters.sort(valid,function(a,b) return a ~= b and compare(a,b) == -1 end)
         for i=1,#valid do
             valid[i] = valid[i].index
         end
diff --git a/tex/context/base/publ-imp-author.mkvi b/tex/context/base/publ-imp-author.mkvi
index e21353f63..6326ac3d8 100644
--- a/tex/context/base/publ-imp-author.mkvi
+++ b/tex/context/base/publ-imp-author.mkvi
@@ -24,28 +24,13 @@
 
 % You can adapt these setups to your liking, for instance as:
 
-% \startsetups btx:cite:author:normal
-%     \fastsetup{btx:cite:author:concat}
-%     \ifx\currentbtxfirstnames\empty \else
-%         \begingroup
-%             \bf
-%             \currentbtxfirstnames
-%         \endgroup
-%         \btxcitevariantparameter{firstnamesep}
-%     \fi
-%     \ifx\currentbtxvons\empty \else
-%         \currentbtxvons
-%         \btxcitevariantparameter{vonsep}
-%     \fi
-%     \ifx\currentbtxsurnames\empty \else
-%         \currentbtxsurnames
-%         \ifx\currentbtxjuniors\empty \else
-%             \btxcitevariantparameter{juniorsep}
-%             \currentbtxjuniors
-%         \fi
-%     \fi
-%     \fastsetup{btx:cite:author:etaltext}
-% \stopsetups
+% these can be used instead of the macros and they accept manipulator prefixes
+%
+% \currentbtxinitials   : \btxauthorfield{initials}
+% \currentbtxfirstnames : \btxauthorfield{firstnames}
+% \currentbtxvons       : \btxauthorfield{vons}
+% \currentbtxsurnames   : \btxauthorfield{surnames}
+% \currentbtxjuniors    : \btxauthorfield{juniors}
 
 \startsetups \s!btx:\s!cite:\s!author:concat
     \ifcase\currentbtxconcat \or \or
diff --git a/tex/context/base/publ-ini.mkiv b/tex/context/base/publ-ini.mkiv
index 5f8e335fe..bf8c29363 100644
--- a/tex/context/base/publ-ini.mkiv
+++ b/tex/context/base/publ-ini.mkiv
@@ -318,12 +318,14 @@
 % \let\btxsetdataset\setbtxdataset
 % \let\btxsetentry  \setbtxentry
 
-\def\btxfield   #1{\ctxcommand{btxfield("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdetail  #1{\ctxcommand{btxdetail("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxflush   #1{\ctxcommand{btxflush("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdoifelse#1{\ctxcommand{btxdoifelse("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdoif    #1{\ctxcommand{btxdoif("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdoifnot #1{\ctxcommand{btxdoifnot("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxfield      #1{\ctxcommand{btxfield("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdetail     #1{\ctxcommand{btxdetail("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxauthorfield#1{\ctxcommand{btxauthorfield(\number\currentbtxauthorindex,"#1")}}
+\def\btxflush      #1{\ctxcommand{btxflush("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdoifelse   #1{\ctxcommand{btxdoifelse("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdoif       #1{\ctxcommand{btxdoif("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdoifnot    #1{\ctxcommand{btxdoifnot("\currentbtxdataset","\currentbtxtag","#1")}}
+
 
 \let\btxsetup\fastsetup
 
@@ -353,20 +355,41 @@
 \let\currentbtxcombis       \empty    \unexpanded\def\btxsetcombis       {\def\currentbtxcombis}
 \let\currentbtxdataset      \empty    \unexpanded\def\btxsetdataset      {\def\currentbtxdataset}
 \let\currentbtxfirst        \empty    \unexpanded\def\btxsetfirst        {\def\currentbtxfirst}
-\let\currentbtxfirstnames   \empty    \unexpanded\def\btxsetfirstnames   {\def\currentbtxfirstnames}
-\let\currentbtxinitials     \empty    \unexpanded\def\btxsetinitials     {\def\currentbtxinitials}
 \let\currentbtxinternal     \empty    \unexpanded\def\btxsetinternal     {\def\currentbtxinternal}
-\let\currentbtxjuniors      \empty    \unexpanded\def\btxsetjuniors      {\def\currentbtxjuniors}
 \let\currentbtxlanguage     \empty    \unexpanded\def\btxsetlanguage     {\def\currentbtxlanguage}
 \let\currentbtxsecond       \empty    \unexpanded\def\btxsetsecond       {\def\currentbtxsecond}
-\let\currentbtxsurnames     \empty    \unexpanded\def\btxsetsurnames     {\def\currentbtxsurnames}
 \let\currentbtxtag          \empty    \unexpanded\def\btxsettag          {\def\currentbtxtag}
-\let\currentbtxvons         \empty    \unexpanded\def\btxsetvons         {\def\currentbtxvons}
 \let\currentbtxauthorvariant\v!normal \unexpanded\def\btxsetauthorvariant{\def\currentbtxauthorvariant}
 
-\newconstant\currentbtxoverflow \unexpanded\def\btxsetoverflow#1{\currentbtxoverflow#1\relax}
-\newconstant\currentbtxconcat   \unexpanded\def\btxsetconcat  #1{\currentbtxconcat  #1\relax}
-\newconstant\currentbtxcount    \unexpanded\def\btxsetcount   #1{\currentbtxcount   #1\relax}
+%let\currentbtxfirstnames   \empty    \unexpanded\def\btxsetfirstnames   {\def\currentbtxfirstnames}
+%let\currentbtxinitials     \empty    \unexpanded\def\btxsetinitials     {\def\currentbtxinitials}
+%let\currentbtxjuniors      \empty    \unexpanded\def\btxsetjuniors      {\def\currentbtxjuniors}
+%let\currentbtxsurnames     \empty    \unexpanded\def\btxsetsurnames     {\def\currentbtxsurnames}
+%let\currentbtxvons         \empty    \unexpanded\def\btxsetvons         {\def\currentbtxvons}
+
+%unexpanded\def\getcurrentbtxfirstnames{\ctxcommand{btxauthorfield("firstnames")}
+%unexpanded\def\getcurrentbtxinitials  {\ctxcommand{btxauthorfield("initials")}
+%unexpanded\def\getcurrentbtxjuniors   {\ctxcommand{btxauthorfield("juniors")}
+%unexpanded\def\getcurrentbtxsurnames  {\ctxcommand{btxauthorfield("surnames")}
+%unexpanded\def\getcurrentbtxvons      {\ctxcommand{btxauthorfield("vons")}
+
+\unexpanded\def\currentbtxfirstnames_indeed{\ctxcommand{btx_a_f(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxinitials_indeed  {\ctxcommand{btx_a_i(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxjuniors_indeed   {\ctxcommand{btx_a_j(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxsurnames_indeed  {\ctxcommand{btx_a_s(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxvons_indeed      {\ctxcommand{btx_a_v(\number\currentbtxauthorindex)}}
+
+\let\currentbtxfirstnames   \empty    \unexpanded\def\btxsetfirstnames{\let\currentbtxfirstnames\currentbtxfirstnames_indeed}
+\let\currentbtxinitials     \empty    \unexpanded\def\btxsetinitials  {\let\currentbtxinitials  \currentbtxinitials_indeed  }
+\let\currentbtxjuniors      \empty    \unexpanded\def\btxsetjuniors   {\let\currentbtxjuniors   \currentbtxjuniors_indeed   }
+\let\currentbtxsurnames     \empty    \unexpanded\def\btxsetsurnames  {\let\currentbtxsurnames  \currentbtxsurnames_indeed  }
+\let\currentbtxvons         \empty    \unexpanded\def\btxsetvons      {\let\currentbtxvons      \currentbtxvons_indeed      }
+
+\newconstant\currentbtxoverflow    \unexpanded\def\btxsetoverflow   #1{\currentbtxoverflow   #1\relax}
+\newconstant\currentbtxconcat      \unexpanded\def\btxsetconcat     #1{\currentbtxconcat     #1\relax}
+\newconstant\currentbtxcount       \unexpanded\def\btxsetcount      #1{\currentbtxcount      #1\relax}
+\newconstant\currentbtxauthorindex %unexpanded\def\btxsetauthorindex#1{\currentbtxauthorindex#1\relax} % passed directly
+\newconstant\currentbtxauthorcount %unexpanded\def\btxsetauthorcount#1{\currentbtxauthorcount#1\relax} % passed directly
 
 \def\currentbtxauthorvariant{normal}
 
@@ -381,17 +404,17 @@
    \let\currentbtxdataset  \empty}
 
 \unexpanded\def\btxcitereset % check for less .. not all resets needed
-  {\let        \currentbtxfirst    \empty
-   \let        \currentbtxsecond   \empty
-   \let        \currentbtxinternal \empty
-   \let        \currentbtxbacklink \empty
-   \let        \currentbtxbacktrace\empty % not used here
-   \let        \currentbtxlanguage \empty
-   \let        \currentbtxdataset  \empty
-   \let        \currentbtxtag      \empty
-   \setconstant\currentbtxoverflow \zerocount
-   \setconstant\currentbtxconcat   \zerocount
-   \setconstant\currentbtxcount    \zerocount}
+  {\let        \currentbtxfirst      \empty
+   \let        \currentbtxsecond     \empty
+   \let        \currentbtxinternal   \empty
+   \let        \currentbtxbacklink   \empty
+   \let        \currentbtxbacktrace  \empty % not used here
+   \let        \currentbtxlanguage   \empty
+   \let        \currentbtxdataset    \empty
+   \let        \currentbtxtag        \empty
+   \setconstant\currentbtxoverflow   \zerocount
+   \setconstant\currentbtxconcat     \zerocount
+   \setconstant\currentbtxcount      \zerocount}
 
 %D Tracing
 
@@ -701,8 +724,13 @@
    })}%
    \endgroup}
 
-\unexpanded\def\btxstartauthor{\begingroup}
-\unexpanded\def\btxstopauthor {\endgroup}
+\unexpanded\def\btxstartauthor#1#2%
+  {\begingroup
+   \currentbtxauthorindex#1\relax
+   \currentbtxauthorcount#2\relax}
+
+\unexpanded\def\btxstopauthor
+  {\endgroup}
 
 \unexpanded\def\btxciteauthorsetup#1{\fastsetup{\s!btx:\s!cite:\s!author:#1}}
 \unexpanded\def\btxlistauthorsetup#1{\fastsetup{\s!btx:\s!list:\s!author:#1}}
@@ -950,16 +978,6 @@
 \unexpanded\def\btxcitesetup#1%
   {\fastsetup{\s!btx:\s!cite:#1}} % no \btxcitereset as we loose dataset and such
 
-\unexpanded\def\btxsetfirst      {\def\currentbtxfirst}
-\unexpanded\def\btxsetsecond     {\def\currentbtxsecond}
-\unexpanded\def\btxsettag        {\def\currentbtxtag}
-\unexpanded\def\btxsetdataset    {\def\currentbtxdataset}
-%unexpanded\def\btxsetlanguage   {\def\currentbtxlanguage}
-\unexpanded\def\btxsetinternal   {\def\currentbtxinternal}
-\unexpanded\def\btxsetcount    #1{\setconstant\currentbtxcount   #1\relax}
-\unexpanded\def\btxsetconcat   #1{\setconstant\currentbtxconcat  #1\relax}
-\unexpanded\def\btxsetoverflow #1{\setconstant\currentbtxoverflow#1\relax}
-
 \unexpanded\def\btxstartsubcite#1% #1 can go
   {\begingroup
    \btxcitereset % todo: limited set
diff --git a/tex/context/base/regi-ini.lua b/tex/context/base/regi-ini.lua
index 63f45a0b1..9484db7c7 100644
--- a/tex/context/base/regi-ini.lua
+++ b/tex/context/base/regi-ini.lua
@@ -390,7 +390,7 @@ function regimes.cleanup(regime,str)
                     mapping[split] = v
                 end
             end
-            p = Cs((lpeg.utfchartabletopattern(table.keys(mapping))/mapping+P(1))^0)
+            p = Cs((lpeg.utfchartabletopattern(mapping)/mapping+P(1))^0)
         else
             p = false
         end
diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua
index d1eaacd15..ab6ad0649 100644
--- a/tex/context/base/sort-ini.lua
+++ b/tex/context/base/sort-ini.lua
@@ -53,6 +53,7 @@ have language etc properties that then can be used.</p>
 local gsub, rep, sub, sort, concat, tohash, format = string.gsub, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format
 local utfbyte, utfchar, utfcharacters, utfvalues = utf.byte, utf.char, utf.characters, utf.values
 local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
+local P, Cs, R, S, lpegmatch = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.match
 
 local allocate          = utilities.storage.allocate
 local setmetatableindex = table.setmetatableindex
@@ -367,6 +368,8 @@ end
 
 -- tricky: { 0, 0, 0 } vs { 0, 0, 0, 0 } => longer wins and mm, pm, zm can have them
 
+-- inlining and checking first slot first doesn't speed up (the 400K complex author sort)
+
 local function basicsort(sort_a,sort_b)
     if sort_a and sort_b then
         local na = #sort_a
@@ -374,12 +377,14 @@ local function basicsort(sort_a,sort_b)
         if na > nb then
             na = nb
         end
-        for i=1,na do
-            local ai, bi = sort_a[i], sort_b[i]
-            if ai > bi then
-                return  1
-            elseif ai < bi then
-                return -1
+        if na > 0 then
+            for i=1,na do
+                local ai, bi = sort_a[i], sort_b[i]
+                if ai > bi then
+                    return  1
+                elseif ai < bi then
+                    return -1
+                end
             end
         end
     end
@@ -389,6 +394,10 @@ end
 -- todo: compile compare function
 
 local function basic(a,b) -- trace ea and eb
+    if a == b then
+        -- hashed (shared) entries
+        return 0
+    end
     local ea, eb = a.split, b.split
     local na, nb = #ea, #eb
     if na == 0 and nb == 0 then
@@ -484,25 +493,59 @@ function sorters.basicsorter(a,b)
     return basic(a,b) == -1
 end
 
+-- local function numify(s)
+--     s = digitsoffset + tonumber(s) -- alternatively we can create range or maybe just hex numbers
+--     if s > digitsmaximum then
+--         s = digitsmaximum
+--     end
+--     return utfchar(s)
+-- end
+--
+-- function sorters.strip(str) -- todo: only letters and such
+--     if str and str ~= "" then
+--         -- todo: make a decent lpeg
+--         str = gsub(str,"\\[\"\'~^`]*","") -- \"e -- hm, too greedy
+--         str = gsub(str,"\\%S*","") -- the rest
+--         str = gsub(str,"%s","\001") -- can be option
+--         str = gsub(str,"[%s%[%](){}%$\"\']*","") -- %s already done
+--         if digits == v_numbers then
+--             str = gsub(str,"(%d+)",numify) -- sort numbers properly
+--         end
+--         return str
+--     else
+--         return ""
+--     end
+-- end
+
 local function numify(s)
-    s = digitsoffset + tonumber(s) -- alternatively we can create range
-    if s > digitsmaximum then
-        s = digitsmaximum
+    if digits == v_numbers then
+        return s
+    else
+        s = digitsoffset + tonumber(s) -- alternatively we can create range
+        if s > digitsmaximum then
+            s = digitsmaximum
+        end
+        return utfchar(s)
     end
-    return utfchar(s)
+end
+
+local pattern = nil
+
+local function prepare()
+    pattern = Cs( (
+        characters.tex.toutfpattern()
+      + lpeg.patterns.whitespace / "\000"
+      + (P("\\") * P(1) * R("az","AZ")^0) / ""
+      + S("[](){}$\"'") / ""
+      + R("09")^1 / numify
+      + P(1)
+    )^0 )
+    return pattern
 end
 
 function sorters.strip(str) -- todo: only letters and such
     if str and str ~= "" then
-        -- todo: make a decent lpeg
-        str = gsub(str,"\\[\"\'~^`]*","") -- \"e -- hm, too greedy
-        str = gsub(str,"\\%S*","") -- the rest
-        str = gsub(str,"%s","\001") -- can be option
-        str = gsub(str,"[%s%[%](){}%$\"\']*","") -- %s already done
-        if digits == v_numbers then
-            str = gsub(str,"(%d+)",numify) -- sort numbers properly
-        end
-        return str
+        return lpegmatch(pattern or prepare(),str)
     else
         return ""
     end
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 5bfd7eade..233518f5c 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index 1da58153a..85f8ab47a 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
diff --git a/tex/context/base/x-asciimath.lua b/tex/context/base/x-asciimath.lua
index b3202daa9..0849b42a5 100644
--- a/tex/context/base/x-asciimath.lua
+++ b/tex/context/base/x-asciimath.lua
@@ -829,9 +829,9 @@ local m_right = {
 }
 
 local p_left =
-    lpeg.utfchartabletopattern(keys(m_left)) / m_left
+    lpeg.utfchartabletopattern(m_left) / m_left
 local p_right =
-    lpeg.utfchartabletopattern(keys(m_right)) / m_right
+    lpeg.utfchartabletopattern(m_right) / m_right
 
 -- special cases
 
diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua
index 52a65ea57..22dd8c32b 100644
--- a/tex/generic/context/luatex/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
 -- merged file : luatex-fonts-merged.lua
 -- parent file : luatex-fonts.lua
--- merge date  : 07/04/14 15:55:31
+-- merge date  : 07/06/14 21:17:47
 
 do -- begin closure to overcome local limits and interference
 
@@ -665,17 +665,34 @@ end
 function lpeg.utfchartabletopattern(list) 
   local tree={}
   local hash={}
-  for i=1,#list do
-    local t=tree
-    for c in gmatch(list[i],".") do
-      local tc=t[c]
-      if not tc then
-        tc={}
-        t[c]=tc
+  local n=#list
+  if n==0 then
+    for s in next,list do
+      local t=tree
+      for c in gmatch(s,".") do
+        local tc=t[c]
+        if not tc then
+          tc={}
+          t[c]=tc
+        end
+        t=tc
+      end
+      hash[t]=s
+    end
+  else
+    for i=1,n do
+      local t=tree
+      local s=list[i]
+      for c in gmatch(s,".") do
+        local tc=t[c]
+        if not tc then
+          tc={}
+          t[c]=tc
+        end
+        t=tc
       end
-      t=tc
+      hash[t]=s
     end
-    hash[t]=list[i]
   end
   return make(tree,hash)
 end
author	Context Git Mirror Bot <phg42.2a@gmail.com>	2014-07-06 22:15:04 +0200
committer	Context Git Mirror Bot <phg42.2a@gmail.com>	2014-07-06 22:15:04 +0200
commit	9081241531f9d2adb062dd168a184c6c272456d9 (patch)
tree	f08bdb10820f24627450387dfe71ebd73881d714 /tex
parent	5ab3de5b82ca897d811c6f649895cee1dd7e7e56 (diff)
download	context-9081241531f9d2adb062dd168a184c6c272456d9.tar.gz