23 files changed, 1082 insertions, 1002 deletions
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index 51f9ed8a0..b4c6976b4 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
-\newcontextversion{2014.10.03 19:27}
+\newcontextversion{2014.10.06 00:29}
 
 %D This file is loaded at runtime, thereby providing an excellent place for
 %D hacks, patches, extensions and new features.
diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf
index 53e291920..9069b051b 100644
--- a/tex/context/base/context-version.pdf
+++ b/tex/context/base/context-version.pdf
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index 0182e23a2..e76ba90d7 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -28,7 +28,7 @@
 %D up and the dependencies are more consistent.
 
 \edef\contextformat {\jobname}
-\edef\contextversion{2014.10.03 19:27}
+\edef\contextversion{2014.10.06 00:29}
 \edef\contextkind   {beta}
 
 %D For those who want to use this:
diff --git a/tex/context/base/data-tex.lua b/tex/context/base/data-tex.lua
index 04c5ef469..b6b97a0a9 100644
--- a/tex/context/base/data-tex.lua
+++ b/tex/context/base/data-tex.lua
@@ -77,13 +77,13 @@ function helpers.textopener(tag,filename,filehandle,coding)
             report_tex("%a opener: %a opened using method %a",tag,filename,coding)
         end
         if coding == "utf-16-be" then
-            lines = utf.utf16_to_utf8_be(lines)
+            lines = utf.utf16_to_utf8_be_t(lines)
         elseif coding == "utf-16-le" then
-            lines = utf.utf16_to_utf8_le(lines)
+            lines = utf.utf16_to_utf8_le_t(lines)
         elseif coding == "utf-32-be" then
-            lines = utf.utf32_to_utf8_be(lines)
+            lines = utf.utf32_to_utf8_be_t(lines)
         elseif coding == "utf-32-le" then
-            lines = utf.utf32_to_utf8_le(lines)
+            lines = utf.utf32_to_utf8_le_t(lines)
         else -- utf8 or unknown (could be a mkvi file)
             local runner = textfileactions.runner
             if runner then
diff --git a/tex/context/base/font-afm.lua b/tex/context/base/font-afm.lua
index e5c9af759..ca5616a1e 100644
--- a/tex/context/base/font-afm.lua
+++ b/tex/context/base/font-afm.lua
@@ -64,6 +64,8 @@ afm.addligatures         = true -- best leave this set to true
 afm.addtexligatures      = true -- best leave this set to true
 afm.addkerns             = true -- best leave this set to true
 
+local overloads          = fonts.mappings.overloads
+
 local applyruntimefixes  = fonts.treatments and fonts.treatments.applyfixes
 
 local function setmode(tfmdata,value)
@@ -81,16 +83,6 @@ registerafmfeature {
     }
 }
 
-local remappednames = {
-    ff  = { name = "f_f",   unicode = { 0x66, 0x66 } },
-    fi  = { name = "f_i",   unicode = { 0x66, 0x69 } },
-    fj  = { name = "f_j",   unicode = { 0x66, 0x6A } },
-    fk  = { name = "f_k",   unicode = { 0x66, 0x6B } },
-    fl  = { name = "f_l",   unicode = { 0x66, 0x6C } },
-    ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 } },
-    ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C } },
-}
-
 --[[ldx--
 <p>We start with the basic reader which we give a name similar to the
 built in <l n='tfm'/> and <l n='otf'/> reader.</p>
@@ -456,12 +448,13 @@ end
 fixnames = function(data)
     for k, v in next, data.descriptions do
         local n = v.name
-        local r = remappednames[n]
+        local r = overloads[n]
         if r then
+            local name = r.name
             if trace_indexing then
-                report_afm("renaming characters %a to %a",n,r.name)
+                report_afm("renaming characters %a to %a",n,name)
             end
-            v.name = r.name
+            v.name    = name
             v.unicode = r.unicode
         end
     end
diff --git a/tex/context/base/font-map.lua b/tex/context/base/font-map.lua
index 309435e0d..890e47d3f 100644
--- a/tex/context/base/font-map.lua
+++ b/tex/context/base/font-map.lua
@@ -6,12 +6,13 @@ if not modules then modules = { } end modules ['font-map'] = {
     license   = "see context related readme files"
 }
 
-local tonumber = tonumber
+local tonumber, next, type = tonumber, next, type
 
 local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
 local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match
 local utfbyte = utf.byte
 local floor = math.floor
+local formatters = string.formatters
 
 local trace_loading = false  trackers.register("fonts.loading", function(v) trace_loading    = v end)
 local trace_mapping = false  trackers.register("fonts.mapping", function(v) trace_unimapping = v end)
@@ -66,11 +67,14 @@ local function makenameparser(str)
     end
 end
 
+local f_single = formatters["%04X"]
+local f_double = formatters["%04X%04X"]
+
 local function tounicode16(unicode,name)
     if unicode < 0x10000 then
-        return format("%04X",unicode)
+        return f_single(unicode)
     elseif unicode < 0x1FFFFFFFFF then
-        return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+        return f_double(floor(unicode/1024),unicode%1024+0xDC00)
     else
         report_fonts("can't convert %a in %a into tounicode",unicode,name)
     end
@@ -81,9 +85,9 @@ local function tounicode16sequence(unicodes,name)
     for l=1,#unicodes do
         local u = unicodes[l]
         if u < 0x10000 then
-            t[l] = format("%04X",u)
+            t[l] = f_single(u)
         elseif unicode < 0x1FFFFFFFFF then
-            t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+            t[l] = f_double(floor(u/1024),u%1024+0xDC00)
         else
             report_fonts ("can't convert %a in %a into tounicode",u,name)
             return
@@ -98,9 +102,9 @@ local function tounicode(unicode,name)
         for l=1,#unicode do
             local u = unicode[l]
             if u < 0x10000 then
-                t[l] = format("%04X",u)
+                t[l] = f_single(u)
             elseif u < 0x1FFFFFFFFF then
-                t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+                t[l] = f_double(floor(u/1024),u%1024+0xDC00)
             else
                 report_fonts ("can't convert %a in %a into tounicode",u,name)
                 return
@@ -109,9 +113,9 @@ local function tounicode(unicode,name)
         return concat(t)
     else
         if unicode < 0x10000 then
-            return format("%04X",unicode)
+            return f_single(unicode)
         elseif unicode < 0x1FFFFFFFFF then
-            return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+            return f_double(floor(unicode/1024),unicode%1024+0xDC00)
         else
             report_fonts("can't convert %a in %a into tounicode",unicode,name)
         end
@@ -187,321 +191,35 @@ local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator *
 -- test("such_so_more")
 -- test("such_so_more.that")
 
--- function mappings.addtounicode(data,filename)
---     local resources    = data.resources
---     local properties   = data.properties
---     local descriptions = data.descriptions
---     local unicodes     = resources.unicodes
---     local lookuptypes  = resources.lookuptypes
---     if not unicodes then
---         return
---     end
---     -- we need to move this code
---     unicodes['space']  = unicodes['space']  or 32
---     unicodes['hyphen'] = unicodes['hyphen'] or 45
---     unicodes['zwj']    = unicodes['zwj']    or 0x200D
---     unicodes['zwnj']   = unicodes['zwnj']   or 0x200C
---     -- the tounicode mapping is sparse and only needed for alternatives
---     local private       = fonts.constructors.privateoffset
---     local unknown       = format("%04X",utfbyte("?"))
---     local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
---     ----- namevector    = fonts.encodings.agl.names    -- loaded runtime in context
---     local tounicode     = { }
---     local originals     = { }
---     local missing       = { }
---     resources.tounicode = tounicode
---     resources.originals = originals
---     local lumunic, uparser, oparser
---     local cidinfo, cidnames, cidcodes, usedmap
---  -- if false then -- will become an option
---  --     lumunic = loadlumtable(filename)
---  --     lumunic = lumunic and lumunic.tounicode
---  -- end
---     --
---     cidinfo = properties.cidinfo
---     usedmap = cidinfo and fonts.cid.getmap(cidinfo)
---     --
---     if usedmap then
---         oparser  = usedmap and makenameparser(cidinfo.ordering)
---         cidnames = usedmap.names
---         cidcodes = usedmap.unicodes
---     end
---     uparser = makenameparser()
---     local ns, nl = 0, 0
---     for unic, glyph in next, descriptions do
---         local index = glyph.index
---         local name  = glyph.name
---         if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
---             local unicode = lumunic and lumunic[name] or unicodevector[name]
---             if unicode then
---                 originals[index] = unicode
---                 tounicode[index] = tounicode16(unicode,name)
---                 ns               = ns + 1
---             end
---             -- cidmap heuristics, beware, there is no guarantee for a match unless
---             -- the chain resolves
---             if (not unicode) and usedmap then
---                 local foundindex = lpegmatch(oparser,name)
---                 if foundindex then
---                     unicode = cidcodes[foundindex] -- name to number
---                     if unicode then
---                         originals[index] = unicode
---                         tounicode[index] = tounicode16(unicode,name)
---                         ns               = ns + 1
---                     else
---                         local reference = cidnames[foundindex] -- number to name
---                         if reference then
---                             local foundindex = lpegmatch(oparser,reference)
---                             if foundindex then
---                                 unicode = cidcodes[foundindex]
---                                 if unicode then
---                                     originals[index] = unicode
---                                     tounicode[index] = tounicode16(unicode,name)
---                                     ns               = ns + 1
---                                 end
---                             end
---                             if not unicode or unicode == "" then
---                                 local foundcodes, multiple = lpegmatch(uparser,reference)
---                                 if foundcodes then
---                                     originals[index] = foundcodes
---                                     if multiple then
---                                         tounicode[index] = tounicode16sequence(foundcodes)
---                                         nl               = nl + 1
---                                         unicode          = true
---                                     else
---                                         tounicode[index] = tounicode16(foundcodes,name)
---                                         ns               = ns + 1
---                                         unicode          = foundcodes
---                                     end
---                                 end
---                             end
---                         end
---                     end
---                 end
---             end
---             -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_
---             --
---             -- It is not trivial to find a solution that suits all fonts. We tried several alternatives
---             -- and this one seems to work reasonable also with fonts that use less standardized naming
---             -- schemes. The extra private test is tested by KE and seems to work okay with non-typical
---             -- fonts as well.
---             --
---             -- The next time I look into this, I'll add an extra analysis step to the otf loader (we can
---             -- resolve some tounicodes by looking into the gsub data tables that are bound to glyphs.
---             --
---             if not unicode or unicode == "" then
---                 local split = lpegmatch(namesplitter,name)
---                 local nsplit = split and #split or 0
---                 local t, n = { }, 0
---                 unicode = true
---                 for l=1,nsplit do
---                     local base = split[l]
---                     local u = unicodes[base] or unicodevector[base]
---                     if not u then
---                         break
---                     elseif type(u) == "table" then
---                         if u[1] >= private then
---                             unicode = false
---                             break
---                         end
---                         n = n + 1
---                         t[n] = u[1]
---                     else
---                         if u >= private then
---                             unicode = false
---                             break
---                         end
---                         n = n + 1
---                         t[n] = u
---                     end
---                 end
---                 if n == 0 then -- done then
---                     -- nothing
---                 elseif n == 1 then
---                     local unicode = t[1]
---                      originals[index] = unicode
---                      tounicode[index] = tounicode16(unicode,name)
---                 else
---                      originals[index] = t
---                      tounicode[index] = tounicode16sequence(t)
---                 end
---                 nl = nl + 1
---             end
---             -- last resort (we might need to catch private here as well)
---             if not unicode or unicode == "" then
---                 local foundcodes, multiple = lpegmatch(uparser,name)
---                 if foundcodes then
---                     if multiple then
---                          originals[index] = foundcodes
---                          tounicode[index] = tounicode16sequence(foundcodes,name)
---                         nl               = nl + 1
---                         unicode          = true
---                     else
---                          originals[index] = foundcodes
---                          tounicode[index] = tounicode16(foundcodes,name)
---                         ns               = ns + 1
---                         unicode          = foundcodes
---                     end
---                 end
---             end
---             -- check using substitutes and alternates
---             --
---             if not unicode then
---                 missing[name] = true
---             end
---          -- if not unicode then
---          --     originals[index] = 0xFFFD
---          --     tounicode[index] = "FFFD"
---          -- end
---         end
---     end
---     if next(missing) then
---         local guess  = { }
---         -- helper
---         local function check(gname,code,unicode)
---             local description = descriptions[code]
---             -- no need to add a self reference
---             local variant = description.name
---             if variant == gname then
---                 return
---             end
---             -- the variant already has a unicode (normally that resultrs in a default tounicode to self)
---             local unic = unicodes[variant]
---             if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
---                 -- no default mapping and therefore maybe no tounicode yet
---             else
---                 return
---             end
---             -- the variant already has a tounicode
---             local index = descriptions[code].index
---             if tounicode[index] then
---                 return
---             end
---             -- add to the list
---             local g = guess[variant]
---             if g then
---                 g[gname] = unicode
---             else
---                 guess[variant] = { [gname] = unicode }
---             end
---         end
---         --
---         for unicode, description in next, descriptions do
---             local slookups = description.slookups
---             if slookups then
---                 local gname = description.name
---                 for tag, data in next, slookups do
---                     local lookuptype = lookuptypes[tag]
---                     if lookuptype == "alternate" then
---                         for i=1,#data do
---                             check(gname,data[i],unicode)
---                         end
---                     elseif lookuptype == "substitution" then
---                         check(gname,data,unicode)
---                     end
---                 end
---             end
---             local mlookups = description.mlookups
---             if mlookups then
---                 local gname = description.name
---                 for tag, list in next, mlookups do
---                     local lookuptype = lookuptypes[tag]
---                     if lookuptype == "alternate" then
---                         for i=1,#list do
---                             local data = list[i]
---                             for i=1,#data do
---                                 check(gname,data[i],unicode)
---                             end
---                         end
---                     elseif lookuptype == "substitution" then
---                         for i=1,#list do
---                             check(gname,list[i],unicode)
---                         end
---                     end
---                 end
---             end
---         end
---         -- resolve references
---         local done = true
---         while done do
---             done = false
---             for k, v in next, guess do
---                 if type(v) ~= "number" then
---                     for kk, vv in next, v do
---                         if vv == -1 or vv >= private or (vv >= 0xE000 and vv <= 0xF8FF) or vv == 0xFFFE or vv == 0xFFFF then
---                             local uu = guess[kk]
---                             if type(uu) == "number" then
---                                 guess[k] = uu
---                                 done = true
---                             end
---                         else
---                             guess[k] = vv
---                             done = true
---                         end
---                     end
---                 end
---             end
---         end
---         -- generate tounicodes
---         for k, v in next, guess do
---             if type(v) == "number" then
---                 guess[k] = tounicode16(v)
---             else
---                 local t = nil
---                 local l = lower(k)
---                 local u = unicodes[l]
---                 if not u then
---                     -- forget about it
---                 elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then
---                     local du = descriptions[u]
---                     local index = du.index
---                     t = tounicode[index]
---                     if t then
---                         tounicode[index] = v
---                         originals[index] = unicode
---                     end
---                 else
---                  -- t = u
---                 end
---                 if t then
---                     guess[k] = t
---                 else
---                     guess[k] = "FFFD"
---                 end
---             end
---         end
---         local orphans = 0
---         local guessed = 0
---         for k, v in next, guess do
---             if v == "FFFD" then
---                 orphans = orphans + 1
---                 guess[k] = false
---             else
---                 guessed = guessed + 1
---                 guess[k] = true
---             end
---         end
---      -- resources.nounicode = guess -- only when we test things
---         if trace_loading and orphans > 0 or guessed > 0 then
---             report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans)
---         end
---     end
---     if trace_mapping then
---         for unic, glyph in table.sortedhash(descriptions) do
---             local name  = glyph.name
---             local index = glyph.index
---             local toun  = tounicode[index]
---             if toun then
---                 report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun)
---             else
---                 report_fonts("internal slot %U, name %a, unicode %U",index,name,unic)
---             end
---         end
---     end
---     if trace_loading and (ns > 0 or nl > 0) then
---         report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns)
---     end
--- end
+-- to be completed .. for fonts that use unicodes for ligatures which
+-- is a actually a bad thing and should be avoided in the first place
+
+local overloads = {
+    IJ  = { name = "I_J",   unicode = { 0x49, 0x4A },       mess = 0x0132 },
+    ij  = { name = "i_j",   unicode = { 0x69, 0x6A },       mess = 0x0133 },
+    ff  = { name = "f_f",   unicode = { 0x66, 0x66 },       mess = 0xFB00 },
+    fi  = { name = "f_i",   unicode = { 0x66, 0x69 },       mess = 0xFB01 },
+    fl  = { name = "f_l",   unicode = { 0x66, 0x6C },       mess = 0xFB02 },
+    ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 },
+    ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 },
+    fj  = { name = "f_j",   unicode = { 0x66, 0x6A } },
+    fk  = { name = "f_k",   unicode = { 0x66, 0x6B } },
+}
+
+require("char-ini")
+
+for k, v in next, overloads do
+    local name = v.name
+    local mess = v.mess
+    if name then
+        overloads[name] = v
+    end
+    if mess then
+        overloads[mess] = v
+    end
+end
+
+mappings.overloads = overloads
 
 function mappings.addtounicode(data,filename)
     local resources    = data.resources
@@ -513,12 +231,11 @@ function mappings.addtounicode(data,filename)
         return
     end
     -- we need to move this code
-    unicodes['space']  = unicodes['space']  or 32
-    unicodes['hyphen'] = unicodes['hyphen'] or 45
-    unicodes['zwj']    = unicodes['zwj']    or 0x200D
-    unicodes['zwnj']   = unicodes['zwnj']   or 0x200C
+    unicodes['space']   = unicodes['space']  or 32
+    unicodes['hyphen']  = unicodes['hyphen'] or 45
+    unicodes['zwj']     = unicodes['zwj']    or 0x200D
+    unicodes['zwnj']    = unicodes['zwnj']   or 0x200C
     local private       = fonts.constructors.privateoffset
-    local unknown       = format("%04X",utfbyte("?"))
     local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
     ----- namevector    = fonts.encodings.agl.names    -- loaded runtime in context
     local missing       = { }
@@ -538,7 +255,12 @@ function mappings.addtounicode(data,filename)
     for unic, glyph in next, descriptions do
         local index = glyph.index
         local name  = glyph.name
-        if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+        local r = overloads[name]
+        if r then
+            -- get rid of weird ligatures
+         -- glyph.name    = r.name
+            glyph.unicode = r.unicode
+        elseif unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
             local unicode = lumunic and lumunic[name] or unicodevector[name]
             if unicode then
                 glyph.unicode = unicode
@@ -641,6 +363,11 @@ function mappings.addtounicode(data,filename)
                 end
             end
             -- check using substitutes and alternates
+            local r = overloads[unicode]
+            if r then
+                unicode = r.unicode
+                glyph.unicode = unicode
+            end
             --
             if not unicode then
                 missing[name] = true
@@ -670,6 +397,10 @@ function mappings.addtounicode(data,filename)
             end
             -- add to the list
             local g = guess[variant]
+         -- local r = overloads[unicode]
+         -- if r then
+         --     unicode = r.unicode
+         -- end
             if g then
                 g[gname] = unicode
             else
diff --git a/tex/context/base/font-mis.lua b/tex/context/base/font-mis.lua
index 96d240300..22f4ccc58 100644
--- a/tex/context/base/font-mis.lua
+++ b/tex/context/base/font-mis.lua
@@ -22,7 +22,7 @@ local handlers = fonts.handlers
 handlers.otf   = handlers.otf or { }
 local otf      = handlers.otf
 
-otf.version    = otf.version or 2.801
+otf.version    = otf.version or 2.802
 otf.cache      = otf.cache   or containers.define("fonts", "otf", otf.version, true)
 
 function otf.loadcached(filename,format,sub)
diff --git a/tex/context/base/font-otf.lua b/tex/context/base/font-otf.lua
index 58a72508a..18b975215 100644
--- a/tex/context/base/font-otf.lua
+++ b/tex/context/base/font-otf.lua
@@ -53,7 +53,7 @@ local otf                = fonts.handlers.otf
 
 otf.glists               = { "gsub", "gpos" }
 
-otf.version              = 2.801 -- beware: also sync font-mis.lua
+otf.version              = 2.802 -- beware: also sync font-mis.lua
 otf.cache                = containers.define("fonts", "otf", otf.version, true)
 
 local fontdata           = fonts.hashes.identifiers
diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua
index f3fd28b1d..f310bc0fe 100644
--- a/tex/context/base/l-lpeg.lua
+++ b/tex/context/base/l-lpeg.lua
@@ -145,6 +145,9 @@ patterns.utfbom_8      = utfbom_8
 patterns.utf_16_be_nl  = P("\000\r\000\n") + P("\000\r") + P("\000\n") -- P("\000\r") * (P("\000\n") + P(true)) + P("\000\n")
 patterns.utf_16_le_nl  = P("\r\000\n\000") + P("\r\000") + P("\n\000") -- P("\r\000") * (P("\n\000") + P(true)) + P("\n\000")
 
+patterns.utf_32_be_nl  = P("\000\000\000\r\000\000\000\n") + P("\000\000\000\r") + P("\000\000\000\n")
+patterns.utf_32_le_nl  = P("\r\000\000\000\n\000\000\000") + P("\r\000\000\000") + P("\n\000\000\000")
+
 patterns.utf8one       = R("\000\127")
 patterns.utf8two       = R("\194\223") * utf8next
 patterns.utf8three     = R("\224\239") * utf8next * utf8next
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua
index 85956308a..b3a4c35e6 100644
--- a/tex/context/base/l-unicode.lua
+++ b/tex/context/base/l-unicode.lua
@@ -56,7 +56,6 @@ local p_utfbom        = patterns.utfbom
 local p_newline       = patterns.newline
 local p_whitespace    = patterns.whitespace
 
-
 if not unicode then
 
     unicode = { utf = utf } -- for a while
@@ -526,7 +525,8 @@ end
 -- end
 
 function utf.remapper(mapping,option) -- static also returns a pattern
-    if type(mapping) == "table" then
+    local variant = type(mapping)
+    if variant == "table" then
         if option == "dynamic" then
             local pattern = false
             table.setmetatablenewindex(mapping,function(t,k,v) rawset(t,k,v) pattern = false end)
@@ -553,6 +553,19 @@ function utf.remapper(mapping,option) -- static also returns a pattern
                 end
             end, pattern
         end
+    elseif variant == "function" then
+        if option == "pattern" then
+            return Cs((p_utf8char/mapping + p_utf8char)^0)
+        else
+            local pattern = Cs((p_utf8char/mapping + p_utf8char)^0)
+            return function(str)
+                if not str or str == "" then
+                    return ""
+                else
+                    return lpegmatch(pattern,str)
+                end
+            end, pattern
+        end
     else
         -- is actually an error
         return function(str)
@@ -669,297 +682,359 @@ end
 local utf16_to_utf8_be, utf16_to_utf8_le
 local utf32_to_utf8_be, utf32_to_utf8_le
 
-local utf_16_be_linesplitter = patterns.utfbom_16_be^-1 * lpeg.tsplitat(patterns.utf_16_be_nl)
-local utf_16_le_linesplitter = patterns.utfbom_16_le^-1 * lpeg.tsplitat(patterns.utf_16_le_nl)
+local utf_16_be_getbom = patterns.utfbom_16_be^-1
+local utf_16_le_getbom = patterns.utfbom_16_le^-1
+local utf_32_be_getbom = patterns.utfbom_32_be^-1
+local utf_32_le_getbom = patterns.utfbom_32_le^-1
+
+local utf_16_be_linesplitter = utf_16_be_getbom * lpeg.tsplitat(patterns.utf_16_be_nl)
+local utf_16_le_linesplitter = utf_16_le_getbom * lpeg.tsplitat(patterns.utf_16_le_nl)
+local utf_32_be_linesplitter = utf_32_be_getbom * lpeg.tsplitat(patterns.utf_32_be_nl)
+local utf_32_le_linesplitter = utf_32_le_getbom * lpeg.tsplitat(patterns.utf_32_le_nl)
+
+-- we have three possibilities: bytepairs (using tables), gmatch (using tables), gsub and
+-- lpeg. Bytepairs are the fastert but as soon as we need to remove bombs and so the gain
+-- is less due to more testing. Also, we seldom have to convert utf16 so we don't care to
+-- much about a few  milliseconds more runtime. The lpeg variant is upto 20% slower but
+-- still pretty fast.
+--
+-- for historic resone we keep the bytepairs variants around .. beware they don't grab the
+-- bom like the lpegs do so they're not dropins in the functions that follow
+--
+-- utf16_to_utf8_be = function(s)
+--     if not s then
+--         return nil
+--     elseif s == "" then
+--         return ""
+--     end
+--     local result, r, more = { }, 0, 0
+--     for left, right in bytepairs(s) do
+--         if right then
+--             local now = 256*left + right
+--             if more > 0 then
+--                 now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+--                 more = 0
+--                 r = r + 1
+--                 result[r] = utfchar(now)
+--             elseif now >= 0xD800 and now <= 0xDBFF then
+--                 more = now
+--             else
+--                 r = r + 1
+--                 result[r] = utfchar(now)
+--             end
+--         end
+--     end
+--     return concat(result)
+-- end
+--
+-- utf16_to_utf8_be_t = function(t)
+--     if not t then
+--         return nil
+--     elseif type(t) == "string" then
+--         t = lpegmatch(utf_16_be_linesplitter,t)
+--     end
+--     local result = { } -- we reuse result
+--     for i=1,#t do
+--         local s = t[i]
+--         if s ~= "" then
+--             local r, more = 0, 0
+--             for left, right in bytepairs(s) do
+--                 if right then
+--                     local now = 256*left + right
+--                     if more > 0 then
+--                         now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+--                         more = 0
+--                         r = r + 1
+--                         result[r] = utfchar(now)
+--                     elseif now >= 0xD800 and now <= 0xDBFF then
+--                         more = now
+--                     else
+--                         r = r + 1
+--                         result[r] = utfchar(now)
+--                     end
+--                 end
+--             end
+--             t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+--         end
+--     end
+--     return t
+-- end
+--
+-- utf16_to_utf8_le = function(s)
+--     if not s then
+--         return nil
+--     elseif s == "" then
+--         return ""
+--     end
+--     local result, r, more = { }, 0, 0
+--     for left, right in bytepairs(s) do
+--         if right then
+--             local now = 256*right + left
+--             if more > 0 then
+--                 now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+--                 more = 0
+--                 r = r + 1
+--                 result[r] = utfchar(now)
+--             elseif now >= 0xD800 and now <= 0xDBFF then
+--                 more = now
+--             else
+--                 r = r + 1
+--                 result[r] = utfchar(now)
+--             end
+--         end
+--     end
+--     return concat(result)
+-- end
+--
+-- utf16_to_utf8_le_t = function(t)
+--     if not t then
+--         return nil
+--     elseif type(t) == "string" then
+--         t = lpegmatch(utf_16_le_linesplitter,t)
+--     end
+--     local result = { } -- we reuse result
+--     for i=1,#t do
+--         local s = t[i]
+--         if s ~= "" then
+--             local r, more = 0, 0
+--             for left, right in bytepairs(s) do
+--                 if right then
+--                     local now = 256*right + left
+--                     if more > 0 then
+--                         now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+--                         more = 0
+--                         r = r + 1
+--                         result[r] = utfchar(now)
+--                     elseif now >= 0xD800 and now <= 0xDBFF then
+--                         more = now
+--                     else
+--                         r = r + 1
+--                         result[r] = utfchar(now)
+--                     end
+--                 end
+--             end
+--             t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+--         end
+--     end
+--     return t
+-- end
+--
+-- utf32_to_utf8_be_t = function(t)
+--     if not t then
+--         return nil
+--     elseif type(t) == "string" then
+--         t = lpegmatch(utflinesplitter,t)
+--     end
+--     local result = { } -- we reuse result
+--     for i=1,#t do
+--         local r, more = 0, -1
+--         for a,b in bytepairs(t[i]) do
+--             if a and b then
+--                 if more < 0 then
+--                     more = 256*256*256*a + 256*256*b
+--                 else
+--                     r = r + 1
+--                     result[t] = utfchar(more + 256*a + b)
+--                     more = -1
+--                 end
+--             else
+--                 break
+--             end
+--         end
+--         t[i] = concat(result,"",1,r)
+--     end
+--     return t
+-- end
+--
+-- utf32_to_utf8_le_t = function(t)
+--     if not t then
+--         return nil
+--     elseif type(t) == "string" then
+--         t = lpegmatch(utflinesplitter,t)
+--     end
+--     local result = { } -- we reuse result
+--     for i=1,#t do
+--         local r, more = 0, -1
+--         for a,b in bytepairs(t[i]) do
+--             if a and b then
+--                 if more < 0 then
+--                     more = 256*b + a
+--                 else
+--                     r = r + 1
+--                     result[t] = utfchar(more + 256*256*256*b + 256*256*a)
+--                     more = -1
+--                 end
+--             else
+--                 break
+--             end
+--         end
+--         t[i] = concat(result,"",1,r)
+--     end
+--     return t
+-- end
 
--- we have three possibilities:
+local more = 0
+
+local p_utf16_to_utf8_be = C(1) * C(1) /function(left,right)
+    local now = 256*byte(left) + byte(right)
+    if more > 0 then
+        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+        more = 0
+        return utfchar(now)
+    elseif now >= 0xD800 and now <= 0xDBFF then
+        more = now
+     -- return ""
+    else
+        return utfchar(now)
+    end
+end
+
+local p_utf16_to_utf8_le = C(1) * C(1) /function(right,left)
+    local now = 256*byte(left) + byte(right)
+    if more > 0 then
+        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+        more = 0
+        return utfchar(now)
+    elseif now >= 0xD800 and now <= 0xDBFF then
+        more = now
+     -- return ""
+    else
+        return utfchar(now)
+    end
+end
+local p_utf32_to_utf8_be = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d)
+    return utfchar(256*256*256*byte(a) + 256*256*byte(b) + 256*byte(c) + byte(d))
+end
 
--- bytepairs: 0.048
--- gmatch   : 0.069
--- lpeg     : 0.089 (match time captures)
+local p_utf32_to_utf8_le = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d)
+    return utfchar(256*256*256*byte(d) + 256*256*byte(c) + 256*byte(b) + byte(a))
+end
 
-if bytepairs then
+p_utf16_to_utf8_be = P(true) / function() more = 0 end * utf_16_be_getbom * Cs(p_utf16_to_utf8_be^0)
+p_utf16_to_utf8_le = P(true) / function() more = 0 end * utf_16_le_getbom * Cs(p_utf16_to_utf8_le^0)
+p_utf32_to_utf8_be = P(true) / function() more = 0 end * utf_32_be_getbom * Cs(p_utf32_to_utf8_be^0)
+p_utf32_to_utf8_le = P(true) / function() more = 0 end * utf_32_le_getbom * Cs(p_utf32_to_utf8_le^0)
 
-    -- with a little bit more code we could include the linesplitter
+patterns.utf16_to_utf8_be = p_utf16_to_utf8_be
+patterns.utf16_to_utf8_le = p_utf16_to_utf8_le
+patterns.utf32_to_utf8_be = p_utf32_to_utf8_be
+patterns.utf32_to_utf8_le = p_utf32_to_utf8_le
 
-    utf16_to_utf8_be = function(t)
-        if not t then
-            return nil
-        elseif type(t) == "string" then
-            t = lpegmatch(utf_16_be_linesplitter,t)
-        end
-        local result = { } -- we reuse result
-        for i=1,#t do
-            local r, more = 0, 0
-            for left, right in bytepairs(t[i]) do
-                if right then
-                    local now = 256*left + right
-                    if more > 0 then
-                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
-                        more = 0
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    elseif now >= 0xD800 and now <= 0xDBFF then
-                        more = now
-                    else
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    end
-                end
-            end
-            t[i] = concat(result,"",1,r) -- we reused tmp, hence t
-        end
-        return t
+utf16_to_utf8_be = function(s)
+    if s and s ~= "" then
+        return lpegmatch(p_utf16_to_utf8_be,s)
+    else
+        return s
     end
+end
 
-    utf16_to_utf8_le = function(t)
-        if not t then
-            return nil
-        elseif type(t) == "string" then
-            t = lpegmatch(utf_16_le_linesplitter,t)
-        end
-        local result = { } -- we reuse result
-        for i=1,#t do
-            local r, more = 0, 0
-            for left, right in bytepairs(t[i]) do
-                if right then
-                    local now = 256*right + left
-                    if more > 0 then
-                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
-                        more = 0
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    elseif now >= 0xD800 and now <= 0xDBFF then
-                        more = now
-                    else
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    end
-                end
-            end
-            t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+utf16_to_utf8_be_t = function(t)
+    if not t then
+        return nil
+    elseif type(t) == "string" then
+        t = lpegmatch(utf_16_be_linesplitter,t)
+    end
+    for i=1,#t do
+        local s = t[i]
+        if s ~= "" then
+            t[i] = lpegmatch(p_utf16_to_utf8_be,s)
         end
-        return t
     end
+    return t
+end
 
-    utf32_to_utf8_be = function(t)
-        if not t then
-            return nil
-        elseif type(t) == "string" then
-            t = lpegmatch(utflinesplitter,t)
-        end
-        local result = { } -- we reuse result
-        for i=1,#t do
-            local r, more = 0, -1
-            for a,b in bytepairs(t[i]) do
-                if a and b then
-                    if more < 0 then
-                        more = 256*256*256*a + 256*256*b
-                    else
-                        r = r + 1
-                        result[t] = utfchar(more + 256*a + b)
-                        more = -1
-                    end
-                else
-                    break
-                end
-            end
-            t[i] = concat(result,"",1,r)
-        end
-        return t
+utf16_to_utf8_le = function(s)
+    if s and s ~= "" then
+        return lpegmatch(p_utf16_to_utf8_le,s)
+    else
+        return s
     end
+end
 
-    utf32_to_utf8_le = function(t)
-        if not t then
-            return nil
-        elseif type(t) == "string" then
-            t = lpegmatch(utflinesplitter,t)
-        end
-        local result = { } -- we reuse result
-        for i=1,#t do
-            local r, more = 0, -1
-            for a,b in bytepairs(t[i]) do
-                if a and b then
-                    if more < 0 then
-                        more = 256*b + a
-                    else
-                        r = r + 1
-                        result[t] = utfchar(more + 256*256*256*b + 256*256*a)
-                        more = -1
-                    end
-                else
-                    break
-                end
-            end
-            t[i] = concat(result,"",1,r)
+utf16_to_utf8_le_t = function(t)
+    if not t then
+        return nil
+    elseif type(t) == "string" then
+        t = lpegmatch(utf_16_le_linesplitter,t)
+    end
+    for i=1,#t do
+        local s = t[i]
+        if s ~= "" then
+            t[i] = lpegmatch(p_utf16_to_utf8_le,s)
         end
-        return t
     end
+    return t
+end
 
-else
-
-    utf16_to_utf8_be = function(t)
-        if not t then
-            return nil
-        elseif type(t) == "string" then
-            t = lpegmatch(utf_16_be_linesplitter,t)
-        end
-        local result = { } -- we reuse result
-        for i=1,#t do
-            local r, more = 0, 0
-            for left, right in gmatch(t[i],"(.)(.)") do
-                if left == "\000" then -- experiment
-                    r = r + 1
-                    result[r] = utfchar(byte(right))
-                elseif right then
-                    local now = 256*byte(left) + byte(right)
-                    if more > 0 then
-                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
-                        more = 0
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    elseif now >= 0xD800 and now <= 0xDBFF then
-                        more = now
-                    else
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    end
-                end
-            end
-            t[i] = concat(result,"",1,r) -- we reused tmp, hence t
-        end
-        return t
+utf32_to_utf8_be = function(s)
+    if s and s ~= "" then
+        return lpegmatch(p_utf32_to_utf8_be,s)
+    else
+        return s
     end
+end
 
-    utf16_to_utf8_le = function(t)
-        if not t then
-            return nil
-        elseif type(t) == "string" then
-            t = lpegmatch(utf_16_le_linesplitter,t)
-        end
-        local result = { } -- we reuse result
-        for i=1,#t do
-            local r, more = 0, 0
-            for left, right in gmatch(t[i],"(.)(.)") do
-                if right == "\000" then
-                    r = r + 1
-                    result[r] = utfchar(byte(left))
-                elseif right then
-                    local now = 256*byte(right) + byte(left)
-                    if more > 0 then
-                        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
-                        more = 0
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    elseif now >= 0xD800 and now <= 0xDBFF then
-                        more = now
-                    else
-                        r = r + 1
-                        result[r] = utfchar(now)
-                    end
-                end
-            end
-            t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+utf32_to_utf8_be_t = function(t)
+    if not t then
+        return nil
+    elseif type(t) == "string" then
+        t = lpegmatch(utf_32_be_linesplitter,t)
+    end
+    for i=1,#t do
+        local s = t[i]
+        if s ~= "" then
+            t[i] = lpegmatch(p_utf32_to_utf8_be,s)
         end
-        return t
     end
+    return t
+end
 
-    utf32_to_utf8_le = function() return { } end -- never used anyway
-    utf32_to_utf8_be = function() return { } end -- never used anyway
-
-    -- the next one is slighty slower
-
-    -- local result, lines, r, more = { }, { }, 0, 0
-    --
-    -- local simple = Cmt(
-    --     C(1) * C(1), function(str,p,left,right)
-    --         local now = 256*byte(left) + byte(right)
-    --         if more > 0 then
-    --             now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
-    --             more = 0
-    --             r = r + 1
-    --             result[r] = utfchar(now)
-    --         elseif now >= 0xD800 and now <= 0xDBFF then
-    --             more = now
-    --         else
-    --             r = r + 1
-    --             result[r] = utfchar(now)
-    --         end
-    --         return p
-    --    end
-    -- )
-    --
-    -- local complex = Cmt(
-    --     C(1) * C(1), function(str,p,left,right)
-    --         local now = 256*byte(left) + byte(right)
-    --         if more > 0 then
-    --             now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
-    --             more = 0
-    --             r = r + 1
-    --             result[r] = utfchar(now)
-    --         elseif now >= 0xD800 and now <= 0xDBFF then
-    --             more = now
-    --         else
-    --             r = r + 1
-    --             result[r] = utfchar(now)
-    --         end
-    --         return p
-    --    end
-    -- )
-    --
-    -- local lineend = Cmt (
-    --     patterns.utf_16_be_nl, function(str,p)
-    --         lines[#lines+1] = concat(result,"",1,r)
-    --         r, more = 0, 0
-    --         return p
-    --     end
-    -- )
-    --
-    -- local be_1 = patterns.utfbom_16_be^-1 * (simple + complex)^0
-    -- local be_2 = patterns.utfbom_16_be^-1 * (lineend + simple + complex)^0
-    --
-    -- utf16_to_utf8_be = function(t)
-    --     if type(t) == "string" then
-    --         local s = t
-    --         lines, r, more = { }, 0, 0
-    --         lpegmatch(be_2,s)
-    --         if r > 0 then
-    --             lines[#lines+1] = concat(result,"",1,r)
-    --         end
-    --         result = { }
-    --         return lines
-    --     else
-    --         for i=1,#t do
-    --             r, more = 0, 0
-    --             lpegmatch(be_1,t[i])
-    --             t[i] = concat(result,"",1,r)
-    --         end
-    --         result = { }
-    --         return t
-    --     end
-    -- end
+utf32_to_utf8_le = function(s)
+    if s and s ~= "" then
+        return lpegmatch(p_utf32_to_utf8_le,s)
+    else
+        return s
+    end
+end
 
+utf32_to_utf8_le_t = function(t)
+    if not t then
+        return nil
+    elseif type(t) == "string" then
+        t = lpegmatch(utf_32_le_linesplitter,t)
+    end
+    for i=1,#t do
+        local s = t[i]
+        if s ~= "" then
+            t[i] = lpegmatch(p_utf32_to_utf8_le,s)
+        end
+    end
+    return t
 end
 
-utf.utf16_to_utf8_le = utf16_to_utf8_le
-utf.utf16_to_utf8_be = utf16_to_utf8_be
-utf.utf32_to_utf8_le = utf32_to_utf8_le
-utf.utf32_to_utf8_be = utf32_to_utf8_be
+utf.utf16_to_utf8_le_t = utf16_to_utf8_le_t
+utf.utf16_to_utf8_be_t = utf16_to_utf8_be_t
+utf.utf32_to_utf8_le_t = utf32_to_utf8_le_t
+utf.utf32_to_utf8_be_t = utf32_to_utf8_be_t
 
-function utf.utf8_to_utf8(t)
+utf.utf16_to_utf8_le   = utf16_to_utf8_le
+utf.utf16_to_utf8_be   = utf16_to_utf8_be
+utf.utf32_to_utf8_le   = utf32_to_utf8_le
+utf.utf32_to_utf8_be   = utf32_to_utf8_be
+
+function utf.utf8_to_utf8_t(t)
     return type(t) == "string" and lpegmatch(utflinesplitter,t) or t
 end
 
-function utf.utf16_to_utf8(t,endian)
-    return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t
+function utf.utf16_to_utf8_t(t,endian)
+    return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t
 end
 
-function utf.utf32_to_utf8(t,endian)
-    return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t
+function utf.utf32_to_utf8_t(t,endian)
+    return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t
 end
 
-local function little(c)
-    local b = byte(c)
+local function little(b)
     if b < 0x10000 then
         return char(b%256,b/256)
     else
@@ -969,8 +1044,7 @@ local function little(c)
     end
 end
 
-local function big(c)
-    local b = byte(c)
+local function big(b)
     if b < 0x10000 then
         return char(b/256,b%256)
     else
@@ -980,18 +1054,10 @@ local function big(c)
     end
 end
 
--- function utf.utf8_to_utf16(str,littleendian)
---     if littleendian then
---         return char(255,254) .. utfgsub(str,".",little)
---     else
---         return char(254,255) .. utfgsub(str,".",big)
---     end
--- end
-
-local l_remap = utf.remapper(little,"pattern")
-local b_remap = utf.remapper(big,"pattern")
+local l_remap = Cs((p_utf8byte/little+P(1)/"")^0)
+local b_remap = Cs((p_utf8byte/big   +P(1)/"")^0)
 
-function utf.utf8_to_utf16_be(str,nobom)
+local function utf8_to_utf16_be(str,nobom)
     if nobom then
         return lpegmatch(b_remap,str)
     else
@@ -999,7 +1065,7 @@ function utf.utf8_to_utf16_be(str,nobom)
     end
 end
 
-function utf.utf8_to_utf16_le(str,nobom)
+local function utf8_to_utf16_le(str,nobom)
     if nobom then
         return lpegmatch(l_remap,str)
     else
@@ -1007,11 +1073,14 @@ function utf.utf8_to_utf16_le(str,nobom)
     end
 end
 
+utf.utf8_to_utf16_be = utf8_to_utf16_be
+utf.utf8_to_utf16_le = utf8_to_utf16_le
+
 function utf.utf8_to_utf16(str,littleendian,nobom)
     if littleendian then
-        return utf.utf8_to_utf16_le(str,nobom)
+        return utf8_to_utf16_le(str,nobom)
     else
-        return utf.utf8_to_utf16_be(str,nobom)
+        return utf8_to_utf16_be(str,nobom)
     end
 end
 
@@ -1042,16 +1111,16 @@ function utf.xstring(s)
 end
 
 function utf.toeight(str)
-    if not str then
+    if not str or str == "" then
         return nil
     end
     local utftype = lpegmatch(p_utfstricttype,str)
     if utftype == "utf-8" then
-        return sub(str,4)
-    elseif utftype == "utf-16-le" then
-        return utf16_to_utf8_le(str)
+        return sub(str,4)               -- remove the bom
     elseif utftype == "utf-16-be" then
-        return utf16_to_utf8_ne(str)
+        return utf16_to_utf8_be(str)    -- bom gets removed
+    elseif utftype == "utf-16-le" then
+        return utf16_to_utf8_le(str)    -- bom gets removed
     else
         return str
     end
diff --git a/tex/context/base/lang-url.mkiv b/tex/context/base/lang-url.mkiv
index 8990dccd8..fd3bd3b0d 100644
--- a/tex/context/base/lang-url.mkiv
+++ b/tex/context/base/lang-url.mkiv
@@ -138,3 +138,31 @@
 % \dorecurse{100}{\test{a} \test{ab} \test{abc} \test{abcd} \test{abcde} \test{abcdef}}
 
 \protect \endinput
+
+% \setuppapersize[A7]
+%
+% \unexpanded\def\WhateverA#1%
+%   {\dontleavehmode
+%    \begingroup
+%    \prehyphenchar"B7\relax
+%    \setbox\scratchbox\hbox{\tttf#1}%
+%    \prehyphenchar`-\relax
+%    \unhbox\scratchbox
+%    \endgroup}
+%
+% \unexpanded\def\WhateverB#1%
+%   {\dontleavehmode
+%    \begingroup
+%    \tttf
+%    \prehyphenchar\minusone
+%  % \localrightbox{\llap{_}}%
+%    \localrightbox{\llap{\smash{\lower1.5ex\hbox{\char"2192}}}}%
+%    \setbox\scratchbox\hbox{#1}%
+%    \prehyphenchar`-\relax
+%    \unhbox\scratchbox
+%    \endgroup}
+%
+% \begingroup \hsize1cm
+%   \WhateverA{thisisaboringandverylongcommand}\par
+%   \WhateverB{thisisaboringandverylongcommand}\par
+% \endgroup
diff --git a/tex/context/base/lpdf-epa.lua b/tex/context/base/lpdf-epa.lua
index 5f6969f45..8ca568b76 100644
--- a/tex/context/base/lpdf-epa.lua
+++ b/tex/context/base/lpdf-epa.lua
@@ -253,6 +253,10 @@ end
 
 -- new: for taco
 
+-- Beware, bookmarks can be in pdfdoc encoding or in unicode. However, in mkiv we
+-- write out the strings in unicode (hex). When we read them in, we check for a bom
+-- and convert to utf.
+
 function codeinjections.getbookmarks(filename)
 
     -- The first version built a nested tree and flattened that afterwards ... but I decided
@@ -325,7 +329,8 @@ function codeinjections.getbookmarks(filename)
 
     local function traverse(current,depth)
         while current do
-            local title = current.Title
+         -- local title = current.Title
+            local title = current("Title") -- can be pdfdoc or unicode
             if title then
                 local entry = {
                     level = depth,
diff --git a/tex/context/base/lpdf-epd.lua b/tex/context/base/lpdf-epd.lua
index 17007cdd1..14432d88b 100644
--- a/tex/context/base/lpdf-epd.lua
+++ b/tex/context/base/lpdf-epd.lua
@@ -27,30 +27,19 @@ if not modules then modules = { } end modules ['lpdf-epd'] = {
 -- there was a long standing gc issue the on long runs with including many pages could
 -- crash the analyzer.
 --
--- - we cannot access all destinations in one run.
--- - v:getTypeName(), versus types[v:getType()], the last variant is about twice as fast
---
--- A potential speedup is to use local function instead of colon accessors. This will be done
--- in due time. Normally this code is not really speed sensitive but one never knows.
-
--- __newindex = function(t,k,v)
---     local tk = rawget(t,k)
---     if not tk then
---         local o = epdf.Object()
---         o:initString(v)
---         d:add(k,o)
---     end
---     rawset(t,k,v)
--- end,
+-- Normally a value is fetched by key, as in foo.Title but as it can be in pdfdoc encoding
+-- a safer bet is foo("Title") which will return a decoded string (or the original if it
+-- already was unicode).
 
 local setmetatable, rawset, rawget, type = setmetatable, rawset, rawget, type
 local tostring, tonumber = tostring, tonumber
-local lower, match, char, utfchar = string.lower, string.match, string.char, utf.char
+local lower, match, char, byte, find = string.lower, string.match, string.char, string.byte, string.find
+local abs = math.abs
 local concat = table.concat
-local toutf = string.toutf
+local toutf, toeight, utfchar = string.toutf, utf.toeight, utf.char
 
 local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
-local P, C, S, R, Ct, Cc, V = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V
+local P, C, S, R, Ct, Cc, V, Carg, Cs = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V, lpeg.Carg, lpeg.Cs
 
 local epdf           = epdf
       lpdf           = lpdf or { }
@@ -159,7 +148,20 @@ local checked_access
 
 -- dictionaries (can be optimized: ... resolve and redefine when all locals set)
 
-local function prepare(document,d,t,n,k,mt)
+local frompdfdoc = lpdf.frompdfdoc
+
+local function get_flagged(t,f,k)
+    local fk = f[k]
+    if not fk then
+        return t[k]
+    elseif fk == "rawtext" then
+        return frompdfdoc(t[k])
+    else -- no other flags yet
+        return t[k]
+    end
+end
+
+local function prepare(document,d,t,n,k,mt,flags)
     for i=1,n do
         local v = dictGetVal(d,i)
         if v then
@@ -174,17 +176,19 @@ local function prepare(document,d,t,n,k,mt)
                         local objnum = getRefNum(r)
                         local cached = document.__cache__[objnum]
                         if not cached then
-                            cached = checked_access[kind](v,document,objnum)
+                            cached = checked_access[kind](v,document,objnum,mt)
                             if c then
                                 document.__cache__[objnum] = cached
                                 document.__xrefs__[cached] = objnum
                             end
                         end
                         t[key] = cached
-                     -- rawset(t,key,cached)
                     else
-                        t[key] = checked_access[kind](v,document)
-                     -- rawset(t,key,checked_access[kind](v,document))
+                        local v, flag = checked_access[kind](v,document)
+                        t[key] = v
+                        if flag then
+                            flags[key] = flag -- flags
+                        end
                     end
                 else
                     report_epdf("warning: nil value for key %a in dictionary",key)
@@ -194,18 +198,26 @@ local function prepare(document,d,t,n,k,mt)
             fatal_error("error: invalid value at index %a in dictionary of %a",i,document.filename)
         end
     end
-    setmetatable(t,mt)
+    if mt then
+        setmetatable(t,mt)
+    else
+        getmetatable(t).__index = nil
+    end
     return t[k]
 end
 
-local function some_dictionary(d,document,r,mt)
+local function some_dictionary(d,document)
     local n = d and dictGetLength(d) or 0
     if n > 0 then
         local t = { }
+        local f = { }
         setmetatable(t, {
             __index = function(t,k)
-                return prepare(document,d,t,n,k,mt)
-            end
+                return prepare(document,d,t,n,k,_,_,f)
+            end,
+            __call = function(t,k)
+                return get_flagged(t,f,k)
+            end,
         } )
         return t
     end
@@ -216,9 +228,13 @@ local function get_dictionary(object,document,r,mt)
     local n = d and dictGetLength(d) or 0
     if n > 0 then
         local t = { }
+        local f = { }
         setmetatable(t, {
             __index = function(t,k)
-                return prepare(document,d,t,n,k,mt)
+                return prepare(document,d,t,n,k,mt,f)
+            end,
+            __call = function(t,k)
+                return get_flagged(t,f,k)
             end,
         } )
         return t
@@ -259,7 +275,7 @@ local function prepare(document,a,t,n,k)
     return t[k]
 end
 
-local function some_array(a,document,r)
+local function some_array(a,document)
     local n = a and arrayGetLength(a) or 0
     if n > 0 then
         local t = { n = n }
@@ -272,7 +288,7 @@ local function some_array(a,document,r)
     end
 end
 
-local function get_array(object,document,r)
+local function get_array(object,document)
     local a = getArray(object)
     local n = a and arrayGetLength(a) or 0
     if n > 0 then
@@ -303,17 +319,45 @@ local function streamaccess(s,_,what)
     end
 end
 
-local function get_stream(d,document,r)
+local function get_stream(d,document)
     if d then
         streamReset(d)
-        local s = some_dictionary(streamGetDict(d),document,r)
+        local s = some_dictionary(streamGetDict(d),document)
         getmetatable(s).__call = function(...) return streamaccess(d,...) end
         return s
     end
 end
 
+-- We need to convert the string from utf16 although there is no way to
+-- check if we have a regular string starting with a bom. So, we have
+-- na dilemma here: a pdf doc encoded string can be invalid utf.
+
+-- <hex encoded>   : implicit 0 appended if odd
+-- (byte encoded)  : \( \) \\ escaped
+--
+-- <FE><FF> : utf16be
+--
+-- \r \r \t \b \f \( \) \\ \NNN and \<newline> : append next line
+--
+-- the getString function gives back bytes so we don't need to worry about
+-- the hex aspect.
+
+local pattern = lpeg.patterns.utfbom_16_be * lpeg.patterns.utf16_to_utf8_be
+
 local function get_string(v)
-    return toutf(getString(v))
+    -- the toutf function only converts a utf16 string and leves the original
+    -- untouched otherwise; one might want to apply lpdf.frompdfdoc to a
+    -- non-unicode string
+    local s = getString(v)
+    if not s or s == "" then
+        return ""
+    end
+    local r = lpegmatch(pattern,s)
+    if r then
+        return r
+    else
+        return s, "rawtext"
+    end
 end
 
 local function get_null()
@@ -340,7 +384,7 @@ end)
 checked_access[typenumbers.boolean]    = getBool
 checked_access[typenumbers.integer]    = getNum
 checked_access[typenumbers.real]       = getReal
-checked_access[typenumbers.string]     = get_string
+checked_access[typenumbers.string]     = get_string     -- getString
 checked_access[typenumbers.name]       = getName
 checked_access[typenumbers.null]       = get_null
 checked_access[typenumbers.array]      = get_array      -- d,document,r
@@ -551,10 +595,10 @@ end
 lpdf.epdf.expand   = expand
 lpdf.epdf.expanded = expanded
 
--- experiment .. will be finished when there is a real need
+-- we could resolve the text stream in one pass if we directly handle the
+-- font but why should we complicate things
 
 local hexdigit  = R("09","AF")
-local hexword   = hexdigit*hexdigit*hexdigit*hexdigit / function(s) return tonumber(s,16) end
 local numchar   = ( P("\\") * ( (R("09")^3/tonumber) + C(1) ) ) + C(1)
 local number    = lpegpatterns.number / tonumber
 local spaces    = lpegpatterns.whitespace^1
@@ -563,10 +607,10 @@ local operator  = C((R("AZ","az")+P("'")+P('"'))^1)
 
 local grammar   = P { "start",
     start      = (keyword + number + V("dictionary") + V("unicode") + V("string") + V("unicode")+ V("array") + spaces)^1,
-    array      = P("[")  * Ct(V("start")^1)            * P("]"),
-    dictionary = P("<<") * Ct(V("start")^1)            * P(">>"),
-    unicode    = P("<")  * Ct(hexword^1)               * P(">"),
-    string     = P("(")  * Ct((V("string")+numchar)^1) * P(")"), -- untested
+    array      = P("[")  * Ct(V("start")^1) * P("]"),
+    dictionary = P("<<") * Ct(V("start")^1) * P(">>"),
+    unicode    = P("<")  * Ct(Cc("hex") * C((1-P(">"))^1))            * P(">"),
+    string     = P("(")  * Ct(Cc("dec") * C((V("string")+numchar)^1)) * P(")"), -- untested
 }
 
 local operation = Ct(grammar^1 * operator)
@@ -574,26 +618,37 @@ local parser    = Ct((operation + P(1))^1)
 
 -- beginbfrange : <start> <stop> <firstcode>
 --                <start> <stop> [ <firstsequence> <firstsequence> <firstsequence> ]
--- beginbfchar  : <code> <newcode>
+-- beginbfchar  : <code> <newcodes>
+
+local fromsixteen = lpdf.fromsixteen -- maybe inline the lpeg ... but not worth it
+
+local function f_bfchar(t,a,b)
+    t[tonumber(a,16)] = fromsixteen(b)
+end
 
--- todo: utf16 -> 8
--- we could make range more efficient but it's seldom seen anyway
+local function f_bfrange_1(t,a,b,c)
+    print("todo 1",a,b,c)
+    -- c is string
+    -- todo t[tonumber(a,16)] = fromsixteen(b)
+end
+
+local function f_bfrange_2(t,a,b,c)
+    print("todo 2",a,b,c)
+    -- c is table
+    -- todo t[tonumber(a,16)] = fromsixteen(b)
+end
 
 local optionals   = spaces^0
-local whatever    = optionals * P("<") * hexword       * P(">")
-local hexstring   = optionals * P("<") * C(hexdigit^1) * P(">")
-local bfchar      = Cc(1) * whatever * whatever
-local bfrange     = Cc(2) * whatever * whatever * whatever
-                  + Cc(3) * whatever * whatever * optionals * P("[") * hexstring^1 * optionals * P("]")
-local fromunicode = Ct ( (
-    P("beginbfchar" ) * Ct(bfchar )^1 * optionals * P("endbfchar" ) +
-    P("beginbfrange") * Ct(bfrange)^1 * optionals * P("endbfrange") +
+local hexstring   = optionals * P("<") * C((1-P(">"))^1) * P(">")
+local bfchar      = Carg(1) * hexstring * hexstring / f_bfchar
+local bfrange     = Carg(1) * hexstring * hexstring * hexstring / f_bfrange_1
+                  + Carg(1) * hexstring * hexstring * optionals * P("[") * Ct(hexstring^1) * optionals * P("]") / f_bfrange_2
+local fromunicode = (
+    P("beginbfchar" ) * bfchar ^1 * optionals * P("endbfchar" ) +
+    P("beginbfrange") * bfrange^1 * optionals * P("endbfrange") +
     spaces +
     P(1)
-)^1 )
-
-local utf16_to_utf8_be = utf.utf16_to_utf8_be
-local utfchar           = utfchar
+)^1  * Carg(1)
 
 local function analyzefonts(document,resources) -- unfinished
     local fonts = document.__fonts__
@@ -606,37 +661,12 @@ local function analyzefonts(document,resources) -- unfinished
                     -- -application for it
                     local tounicode = data.ToUnicode()
                     if tounicode then
-                        tounicode = lpegmatch(fromunicode,tounicode)
-                    end
-                    if type(tounicode) == "table" then
-                        local t = { }
-                        for i=1,#tounicode do
-                            local u = tounicode[i]
-                            local w = u[1]
-                            if w == 1 then
-                                t[u[2]] = utfchar(u[3])
-                            elseif w == 2 then
-                                local m = u[4]
-                                for i=u[2],u[3] do
-                                    t[i] = utfchar(m)
-                                    m = m + 1
-                                end
-                            elseif w == 3 then
-                                local m = 4
-                                for i=u[2],u[3] do
-                                    t[i] = utf16_to_utf8_be(u[m])
-                                    m = m + 1
-                                end
-                            end
-                        end
-                        fonts[id] = {
-                            tounicode = t
-                        }
-                    else
-                        fonts[id] = {
-                            tounicode = { }
-                        }
+                        tounicode = lpegmatch(fromunicode,tounicode,1,{})
                     end
+                    fonts[id] = {
+                        tounicode = type(tounicode) == "table" and tounicode or { }
+                    }
+                    table.setmetatableindex(fonts[id],"self")
                 end
             end
         end
@@ -644,6 +674,31 @@ local function analyzefonts(document,resources) -- unfinished
     return fonts
 end
 
+local more = 0
+local unic = nil -- cheaper than passing each time as Carg(1)
+
+local p_hex_to_utf = C(4) / function(s) -- needs checking !
+    local now = tonumber(s,16)
+    if more > 0 then
+        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+        more = 0
+        return unic[now] or utfchar(now)
+    elseif now >= 0xD800 and now <= 0xDBFF then
+        more = now
+     -- return ""
+    else
+        return unic[now] or utfchar(now)
+    end
+end
+
+local p_dec_to_utf = C(1) / function(s) -- needs checking !
+    local now = byte(s)
+    return unic[now] or utfchar(now)
+end
+
+local p_hex_to_utf = P(true) / function() more = 0 end * Cs(p_hex_to_utf^1)
+local p_dec_to_utf = P(true) / function() more = 0 end * Cs(p_dec_to_utf^1)
+
 function lpdf.epdf.getpagecontent(document,pagenumber)
 
     local page = document.pages[pagenumber]
@@ -657,7 +712,7 @@ function lpdf.epdf.getpagecontent(document,pagenumber)
     local content = page.Contents() or ""
     local list    = lpegmatch(parser,content)
     local font    = nil
-    local unic    = nil
+ -- local unic    = nil
 
     for i=1,#list do
         local entry    = list[i]
@@ -671,55 +726,85 @@ function lpdf.epdf.getpagecontent(document,pagenumber)
             for i=1,#list do
                 local li = list[i]
                 if type(li) == "table" then
-                    for i=1,#li do
-                        local c = li[i]
-                        local u = unic[c]
-                        li[i] = u or utfchar(c)
+                    if li[1] == "hex" then
+                        list[i] = lpegmatch(p_hex_to_utf,li[2])
+                    else
+                        list[i] = lpegmatch(p_dec_to_utf,li[2])
                     end
-                    list[i] = concat(li)
+                else
+                    -- kern
                 end
             end
         elseif operator == "Tj" or operator == "'" or operator == '"' then -- { string,  Tj } { string, ' } { n, m, string, " }
-            local li = entry[size-1]
-            for i=1,#li do
-                local c = li[i]
-                local u = unic[c]
-                li[i] = utfchar(u or c)
+            local list = entry[size-1]
+            if list[1] == "hex" then
+                list[2] = lpegmatch(p_hex_to_utf,li[2],1,unic)
+            else
+                list[2] = lpegmatch(p_dec_to_utf,li[2],1,unic)
             end
-            entry[1] = concat(li)
         end
     end
 
- -- for i=1,#list do
- --     local entry    = list[i]
- --     local size     = #entry
- --     local operator = entry[size]
- --     if operator == "TJ" then -- { array,  TJ }
- --         local list = entry[1]
- --         for i=1,#list do
- --             local li = list[i]
- --             if type(li) == "string" then
- --                 --
- --             elseif li < -50 then
- --                 list[i] = " "
- --             else
- --                 list[i] = ""
- --             end
- --         end
- --         entry[1] = concat(list)
- --     elseif operator == "Tf" then
- --         -- already concat
- --     elseif operator == "cm" then
- --         local e = entry[1]
- --         local sx, rx, ry, sy, tx, ty = e[1], e[2], e[3], e[4], e[5], e[6]
- --         -- if dy ... newline
- --     end
- -- end
+    unic = nil -- can be collected
 
     return list
 
 end
 
+-- This is also an experiment. When I really neet it I can improve it, fo rinstance
+-- with proper position calculating. It might be usefull for some search or so.
+
+local softhyphen = utfchar(0xAD) .. "$"
+local linefactor = 1.3
+
+function lpdf.epdf.contenttotext(document,list) -- maybe signal fonts
+    local last_y = 0
+    local last_f = 0
+    local text   = { }
+    local last   = 0
+
+    for i=1,#list do
+        local entry    = list[i]
+        local size     = #entry
+        local operator = entry[size]
+        if operator == "Tf" then
+            last_f = entry[2]
+        elseif operator == "TJ" then
+            local list = entry[1]
+            for i=1,#list do
+                local li = list[i]
+                if type(li) == "string" then
+                    last = last + 1
+                    text[last] = li
+                elseif li < -50 then
+                    last = last + 1
+                    text[last] = " "
+                end
+            end
+            line = concat(list)
+        elseif operator == "Tj" then
+            last = last + 1
+            text[last] = entry[size-1]
+        elseif operator == "cm" or operator == "Tm" then
+            local ty = entry[6]
+            local dy = abs(last_y - ty)
+            if dy > linefactor*last_f then
+                if last > 0 then
+                    if find(text[last],softhyphen) then
+                        -- ignore
+                    else
+                        last = last + 1
+                        text[last] = "\n"
+                    end
+                end
+            end
+            last_y = ty
+        end
+    end
+
+    return concat(text)
+end
+
 -- document.Catalog.StructTreeRoot.ParentTree.Nums[2][1].A.P[1])
 
 -- helpers
diff --git a/tex/context/base/lpdf-fld.lua b/tex/context/base/lpdf-fld.lua
index 414562ad5..4f15b3c7b 100644
--- a/tex/context/base/lpdf-fld.lua
+++ b/tex/context/base/lpdf-fld.lua
@@ -280,10 +280,8 @@ end
 
 local pdfdocencodingvector, pdfdocencodingcapsule
 
--- The pdf doc encoding vector is needed in order to
--- trigger propper unicode. Interesting is that when
--- a glyph is not in the vector, it is still visible
--- as it is taken from some other font. Messy.
+-- The pdf doc encoding vector is needed in order to trigger propper unicode. Interesting is that when
+-- a glyph is not in the vector, it is still visible as it is taken from some other font. Messy.
 
 -- To be checked: only when text/line fields.
 
diff --git a/tex/context/base/lpdf-ini.lua b/tex/context/base/lpdf-ini.lua
index a4725c30e..76fa5cbb2 100644
--- a/tex/context/base/lpdf-ini.lua
+++ b/tex/context/base/lpdf-ini.lua
@@ -6,6 +6,8 @@ if not modules then modules = { } end modules ['lpdf-ini'] = {
     license   = "see context related readme files"
 }
 
+-- beware of "too many locals" here
+
 local setmetatable, getmetatable, type, next, tostring, tonumber, rawset = setmetatable, getmetatable, type, next, tostring, tonumber, rawset
 local char, byte, format, gsub, concat, match, sub, gmatch = string.char, string.byte, string.format, string.gsub, table.concat, string.match, string.sub, string.gmatch
 local utfchar, utfbyte, utfvalues = utf.char, utf.byte, utf.values
@@ -18,6 +20,10 @@ local report_objects    = logs.reporter("backend","objects")
 local report_finalizing = logs.reporter("backend","finalizing")
 local report_blocked    = logs.reporter("backend","blocked")
 
+-- In ConTeXt MkIV we use utf8 exclusively so all strings get mapped onto a hex
+-- encoded utf16 string type between <>. We could probably save some bytes by using
+-- strings between () but then we end up with escaped ()\ too.
+
 -- gethpos              : used
 -- getpos               : used
 -- getvpos              : used
@@ -227,55 +233,78 @@ local cache = table.setmetatableindex(function(t,k) -- can be made weak
     return v
 end)
 
-local p = Cs(Cc("<feff") * (lpeg.patterns.utf8character/cache)^1 * Cc(">"))
+local escaped = Cs(Cc("(") * (S("\\()")/"\\%0" + P(1))^0 * Cc(")"))
+local unified = Cs(Cc("<feff") * (lpeg.patterns.utf8character/cache)^1 * Cc(">"))
 
 local function tosixteen(str) -- an lpeg might be faster (no table)
     if not str or str == "" then
         return "<feff>" -- not () as we want an indication that it's unicode
     else
-        return lpegmatch(p,str)
+        return lpegmatch(unified,str)
     end
 end
 
-lpdf.tosixteen = tosixteen
+local more = 0
+
+local pattern = C(4) / function(s) -- needs checking !
+    local now = tonumber(s,16)
+    if more > 0 then
+        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+        more = 0
+        return utfchar(now)
+    elseif now >= 0xD800 and now <= 0xDBFF then
+        more = now
+     -- return ""
+    else
+        return utfchar(now)
+    end
+end
 
--- lpeg is some 5 times faster than gsub (in test) on escaping
+local pattern = P(true) / function() more = 0 end * Cs(pattern^0)
 
--- local escapes = {
---     ["\\"] = "\\\\",
---     ["/"] = "\\/", ["#"] = "\\#",
---     ["<"] = "\\<", [">"] = "\\>",
---     ["["] = "\\[", ["]"] = "\\]",
---     ["("] = "\\(", [")"] = "\\)",
--- }
---
--- local escaped = Cs(Cc("(") * (S("\\/#<>[]()")/escapes + P(1))^0 * Cc(")"))
---
--- local function toeight(str)
---     if not str or str == "" then
---         return "()"
---     else
---         return lpegmatch(escaped,str)
---     end
--- end
---
--- -- no need for escaping .. just use unicode instead
+local function fromsixteen(str)
+    if not str or str == "" then
+        return ""
+    else
+        return lpegmatch(pattern,str)
+    end
+end
 
--- \0 \t \n \r \f <space> ( ) [ ] { } / %
+local toregime   = regimes.toregime
+local fromregime = regimes.fromregime
 
-local function toeight(str)
-    return "(" .. str .. ")"
+local function topdfdoc(str,default)
+    if not str or str == "" then
+        return ""
+    else
+        return lpegmatch(escaped,toregime("pdfdoc",str,default)) -- could be combined if needed
+    end
 end
 
-lpdf.toeight = toeight
+local function frompdfdoc(str)
+    if not str or str == "" then
+        return ""
+    else
+        return fromregime("pdfdoc",str)
+    end
+end
 
--- local escaped = lpeg.Cs((lpeg.S("\0\t\n\r\f ()[]{}/%")/function(s) return format("#%02X",byte(s)) end + lpeg.P(1))^0)
---
--- local function cleaned(str)
---     return (str and str ~= "" and lpegmatch(escaped,str)) or ""
--- end
---
--- lpdf.cleaned = cleaned -- not public yet
+if not toregime   then topdfdoc   = function(s) return s end end
+if not fromregime then frompdfdoc = function(s) return s end end
+
+local function toeight(str)
+    if not str or str == "" then
+        return "()"
+    else
+        return lpegmatch(escaped,str)
+    end
+end
+
+lpdf.tosixteen   = tosixteen
+lpdf.toeight     = toeight
+lpdf.topdfdoc    = topdfdoc
+lpdf.fromsixteen = fromsixteen
+lpdf.frompdfdoc  = frompdfdoc
 
 local function merge_t(a,b)
     local t = { }
@@ -310,8 +339,8 @@ tostring_d = function(t,contentonly,key)
                 r[rn] = f_key_value(k,toeight(v))
             elseif tv == "number" then
                 r[rn] = f_key_number(k,v)
-            elseif tv == "unicode" then
-                r[rn] = f_key_value(k,tosixteen(v))
+         -- elseif tv == "unicode" then -- can't happen
+         --     r[rn] = f_key_value(k,tosixteen(v))
             elseif tv == "table" then
                 local mv = getmetatable(v)
                 if mv and mv.__lpdftype then
@@ -350,8 +379,8 @@ tostring_a = function(t,contentonly,key)
                 r[k] = toeight(v)
             elseif tv == "number" then
                 r[k] = f_tonumber(v)
-            elseif tv == "unicode" then
-                r[k] = tosixteen(v)
+         -- elseif tv == "unicode" then
+         --     r[k] = tosixteen(v)
             elseif tv == "table" then
                 local mv = getmetatable(v)
                 local mt = mv and mv.__lpdftype
@@ -380,15 +409,16 @@ tostring_a = function(t,contentonly,key)
     end
 end
 
-local tostring_x = function(t) return concat(t," ")    end
-local tostring_s = function(t) return toeight(t[1])    end
-local tostring_u = function(t) return tosixteen(t[1])  end
-local tostring_n = function(t) return tostring(t[1])   end -- tostring not needed
-local tostring_n = function(t) return f_tonumber(t[1]) end -- tostring not needed
-local tostring_c = function(t) return t[1]             end -- already prefixed (hashed)
-local tostring_z = function()  return "null"           end
-local tostring_t = function()  return "true"           end
-local tostring_f = function()  return "false"          end
+local tostring_x = function(t) return concat(t," ")       end
+local tostring_s = function(t) return toeight(t[1])       end
+local tostring_p = function(t) return topdfdoc(t[1],t[2]) end
+local tostring_u = function(t) return tosixteen(t[1])     end
+local tostring_n = function(t) return tostring(t[1])      end -- tostring not needed
+local tostring_n = function(t) return f_tonumber(t[1])    end -- tostring not needed
+local tostring_c = function(t) return t[1]                end -- already prefixed (hashed)
+local tostring_z = function()  return "null"              end
+local tostring_t = function()  return "true"              end
+local tostring_f = function()  return "false"             end
 local tostring_r = function(t) local n = t[1] return n and n > 0 and (n .. " 0 R") or "NULL" end
 
 local tostring_v = function(t)
@@ -400,18 +430,19 @@ local tostring_v = function(t)
     end
 end
 
-local function value_x(t)     return t                  end -- the call is experimental
-local function value_s(t,key) return t[1]               end -- the call is experimental
-local function value_u(t,key) return t[1]               end -- the call is experimental
-local function value_n(t,key) return t[1]               end -- the call is experimental
-local function value_c(t)     return sub(t[1],2)        end -- the call is experimental
-local function value_d(t)     return tostring_d(t,true) end -- the call is experimental
-local function value_a(t)     return tostring_a(t,true) end -- the call is experimental
-local function value_z()      return nil                end -- the call is experimental
-local function value_t(t)     return t.value or true    end -- the call is experimental
-local function value_f(t)     return t.value or false   end -- the call is experimental
-local function value_r()      return t[1] or 0          end -- the call is experimental -- NULL
-local function value_v()      return t[1]               end -- the call is experimental
+local function value_x(t) return t                  end
+local function value_s(t) return t[1]               end
+local function value_p(t) return t[1]               end
+local function value_u(t) return t[1]               end
+local function value_n(t) return t[1]               end
+local function value_c(t) return sub(t[1],2)        end
+local function value_d(t) return tostring_d(t,true) end
+local function value_a(t) return tostring_a(t,true) end
+local function value_z()  return nil                end
+local function value_t(t) return t.value or true    end
+local function value_f(t) return t.value or false   end
+local function value_r()  return t[1] or 0          end -- NULL
+local function value_v()  return t[1]               end
 
 local function add_x(t,k,v) rawset(t,k,tostring(v)) end
 
@@ -420,6 +451,7 @@ local mt_d = { __lpdftype = "dictionary", __tostring = tostring_d, __call = valu
 local mt_a = { __lpdftype = "array",      __tostring = tostring_a, __call = value_a }
 local mt_u = { __lpdftype = "unicode",    __tostring = tostring_u, __call = value_u }
 local mt_s = { __lpdftype = "string",     __tostring = tostring_s, __call = value_s }
+local mt_p = { __lpdftype = "docstring",  __tostring = tostring_p, __call = value_p }
 local mt_n = { __lpdftype = "number",     __tostring = tostring_n, __call = value_n }
 local mt_c = { __lpdftype = "constant",   __tostring = tostring_c, __call = value_c }
 local mt_z = { __lpdftype = "null",       __tostring = tostring_z, __call = value_z }
@@ -453,6 +485,10 @@ local function pdfstring(str,default)
     return setmetatable({ str or default or "" },mt_s)
 end
 
+local function pdfdocstring(str,default,defaultchar)
+    return setmetatable({ str or default or "", defaultchar or " " },mt_p)
+end
+
 local function pdfunicode(str,default)
     return setmetatable({ str or default or "" },mt_u) -- could be a string
 end
@@ -538,6 +574,7 @@ end
 lpdf.stream      = pdfstream -- THIS WILL PROBABLY CHANGE
 lpdf.dictionary  = pdfdictionary
 lpdf.array       = pdfarray
+lpdf.docstring   = pdfdocstring
 lpdf.string      = pdfstring
 lpdf.unicode     = pdfunicode
 lpdf.number      = pdfnumber
@@ -800,145 +837,147 @@ end
 
 callbacks.register("finish_pdffile", lpdf.finalizedocument)
 
--- some minimal tracing, handy for checking the order
 
-local function trace_set(what,key)
-    if trace_resources then
-        report_finalizing("setting key %a in %a",key,what)
+do
+
+    -- some minimal tracing, handy for checking the order
+
+    local function trace_set(what,key)
+        if trace_resources then
+            report_finalizing("setting key %a in %a",key,what)
+        end
     end
-end
-local function trace_flush(what)
-    if trace_resources then
-        report_finalizing("flushing %a",what)
+
+    local function trace_flush(what)
+        if trace_resources then
+            report_finalizing("flushing %a",what)
+        end
     end
-end
 
-lpdf.protectresources = true
+    lpdf.protectresources = true
 
-local catalog = pdfdictionary { Type = pdfconstant("Catalog") } -- nicer, but when we assign we nil the Type
-local info    = pdfdictionary { Type = pdfconstant("Info")    } -- nicer, but when we assign we nil the Type
------ names   = pdfdictionary { Type = pdfconstant("Names")   } -- nicer, but when we assign we nil the Type
+    local catalog = pdfdictionary { Type = pdfconstant("Catalog") } -- nicer, but when we assign we nil the Type
+    local info    = pdfdictionary { Type = pdfconstant("Info")    } -- nicer, but when we assign we nil the Type
+    ----- names   = pdfdictionary { Type = pdfconstant("Names")   } -- nicer, but when we assign we nil the Type
 
-local function flushcatalog()
-    if not environment.initex then
-        trace_flush("catalog")
-        catalog.Type = nil
-        pdfsetcatalog(catalog())
+    local function flushcatalog()
+        if not environment.initex then
+            trace_flush("catalog")
+            catalog.Type = nil
+            pdfsetcatalog(catalog())
+        end
     end
-end
 
-local function flushinfo()
-    if not environment.initex then
-        trace_flush("info")
-        info.Type = nil
-        pdfsetinfo(info())
+    local function flushinfo()
+        if not environment.initex then
+            trace_flush("info")
+            info.Type = nil
+            pdfsetinfo(info())
+        end
     end
-end
-
--- local function flushnames()
---     if not environment.initex then
---         trace_flush("names")
---         names.Type = nil
---         pdfsetnames(names())
---     end
--- end
 
-function lpdf.addtocatalog(k,v)
-    if not (lpdf.protectresources and catalog[k]) then
-        trace_set("catalog",k)
-        catalog[k] = v
+    -- local function flushnames()
+    --     if not environment.initex then
+    --         trace_flush("names")
+    --         names.Type = nil
+    --         pdfsetnames(names())
+    --     end
+    -- end
+
+    function lpdf.addtocatalog(k,v)
+        if not (lpdf.protectresources and catalog[k]) then
+            trace_set("catalog",k)
+            catalog[k] = v
+        end
     end
-end
 
-function lpdf.addtoinfo(k,v)
-    if not (lpdf.protectresources and info[k]) then
-        trace_set("info",k)
-        info[k] = v
+    function lpdf.addtoinfo(k,v)
+        if not (lpdf.protectresources and info[k]) then
+            trace_set("info",k)
+            info[k] = v
+        end
     end
-end
 
--- local function lpdf.addtonames(k,v)
---     if not (lpdf.protectresources and names[k]) then
---         trace_set("names",k)
---         names[k] = v
---     end
--- end
+    -- local function lpdf.addtonames(k,v)
+    --     if not (lpdf.protectresources and names[k]) then
+    --         trace_set("names",k)
+    --         names[k] = v
+    --     end
+    -- end
 
-local names = pdfdictionary {
- -- Type = pdfconstant("Names")
-}
+    local names = pdfdictionary {
+     -- Type = pdfconstant("Names")
+    }
 
-local function flushnames()
-    if next(names) and not environment.initex then
-        names.Type = pdfconstant("Names")
-        trace_flush("names")
-        lpdf.addtocatalog("Names",pdfreference(pdfimmediateobject(tostring(names))))
+    local function flushnames()
+        if next(names) and not environment.initex then
+            names.Type = pdfconstant("Names")
+            trace_flush("names")
+            lpdf.addtocatalog("Names",pdfreference(pdfimmediateobject(tostring(names))))
+        end
     end
-end
 
-function lpdf.addtonames(k,v)
-    if not (lpdf.protectresources and names  [k]) then
-        trace_set("names",  k)
-        names  [k] = v
+    function lpdf.addtonames(k,v)
+        if not (lpdf.protectresources and names[k]) then
+            trace_set("names",  k)
+            names  [k] = v
+        end
     end
-end
 
-local dummy = pdfreserveobject() -- else bug in hvmd due so some internal luatex conflict
-
--- Some day I will implement a proper minimalized resource management.
-
-local r_extgstates,  d_extgstates  = pdfreserveobject(), pdfdictionary()  local p_extgstates  = pdfreference(r_extgstates)
-local r_colorspaces, d_colorspaces = pdfreserveobject(), pdfdictionary()  local p_colorspaces = pdfreference(r_colorspaces)
-local r_patterns,    d_patterns    = pdfreserveobject(), pdfdictionary()  local p_patterns    = pdfreference(r_patterns)
-local r_shades,      d_shades      = pdfreserveobject(), pdfdictionary()  local p_shades      = pdfreference(r_shades)
-
-local function checkextgstates () if next(d_extgstates ) then addtopageresources("ExtGState", p_extgstates ) end end
-local function checkcolorspaces() if next(d_colorspaces) then addtopageresources("ColorSpace",p_colorspaces) end end
-local function checkpatterns   () if next(d_patterns   ) then addtopageresources("Pattern",   p_patterns   ) end end
-local function checkshades     () if next(d_shades     ) then addtopageresources("Shading",   p_shades     ) end end
-
-local function flushextgstates () if next(d_extgstates ) then trace_flush("extgstates")  pdfimmediateobject(r_extgstates, tostring(d_extgstates )) end end
-local function flushcolorspaces() if next(d_colorspaces) then trace_flush("colorspaces") pdfimmediateobject(r_colorspaces,tostring(d_colorspaces)) end end
-local function flushpatterns   () if next(d_patterns   ) then trace_flush("patterns")    pdfimmediateobject(r_patterns,   tostring(d_patterns   )) end end
-local function flushshades     () if next(d_shades     ) then trace_flush("shades")      pdfimmediateobject(r_shades,     tostring(d_shades     )) end end
-
-function lpdf.collectedresources()
-    local ExtGState  = next(d_extgstates ) and p_extgstates
-    local ColorSpace = next(d_colorspaces) and p_colorspaces
-    local Pattern    = next(d_patterns   ) and p_patterns
-    local Shading    = next(d_shades     ) and p_shades
-    if ExtGState or ColorSpace or Pattern or Shading then
-        local collected = pdfdictionary {
-            ExtGState  = ExtGState,
-            ColorSpace = ColorSpace,
-            Pattern    = Pattern,
-            Shading    = Shading,
-         -- ProcSet    = pdfarray { pdfconstant("PDF") },
-        }
-        return collected()
-    else
-        return ""
+    local r_extgstates,  d_extgstates  = pdfreserveobject(), pdfdictionary()  local p_extgstates  = pdfreference(r_extgstates)
+    local r_colorspaces, d_colorspaces = pdfreserveobject(), pdfdictionary()  local p_colorspaces = pdfreference(r_colorspaces)
+    local r_patterns,    d_patterns    = pdfreserveobject(), pdfdictionary()  local p_patterns    = pdfreference(r_patterns)
+    local r_shades,      d_shades      = pdfreserveobject(), pdfdictionary()  local p_shades      = pdfreference(r_shades)
+
+    local function checkextgstates () if next(d_extgstates ) then addtopageresources("ExtGState", p_extgstates ) end end
+    local function checkcolorspaces() if next(d_colorspaces) then addtopageresources("ColorSpace",p_colorspaces) end end
+    local function checkpatterns   () if next(d_patterns   ) then addtopageresources("Pattern",   p_patterns   ) end end
+    local function checkshades     () if next(d_shades     ) then addtopageresources("Shading",   p_shades     ) end end
+
+    local function flushextgstates () if next(d_extgstates ) then trace_flush("extgstates")  pdfimmediateobject(r_extgstates, tostring(d_extgstates )) end end
+    local function flushcolorspaces() if next(d_colorspaces) then trace_flush("colorspaces") pdfimmediateobject(r_colorspaces,tostring(d_colorspaces)) end end
+    local function flushpatterns   () if next(d_patterns   ) then trace_flush("patterns")    pdfimmediateobject(r_patterns,   tostring(d_patterns   )) end end
+    local function flushshades     () if next(d_shades     ) then trace_flush("shades")      pdfimmediateobject(r_shades,     tostring(d_shades     )) end end
+
+    function lpdf.collectedresources()
+        local ExtGState  = next(d_extgstates ) and p_extgstates
+        local ColorSpace = next(d_colorspaces) and p_colorspaces
+        local Pattern    = next(d_patterns   ) and p_patterns
+        local Shading    = next(d_shades     ) and p_shades
+        if ExtGState or ColorSpace or Pattern or Shading then
+            local collected = pdfdictionary {
+                ExtGState  = ExtGState,
+                ColorSpace = ColorSpace,
+                Pattern    = Pattern,
+                Shading    = Shading,
+             -- ProcSet    = pdfarray { pdfconstant("PDF") },
+            }
+            return collected()
+        else
+            return ""
+        end
     end
-end
 
-function lpdf.adddocumentextgstate (k,v) d_extgstates [k] = v end
-function lpdf.adddocumentcolorspace(k,v) d_colorspaces[k] = v end
-function lpdf.adddocumentpattern   (k,v) d_patterns   [k] = v end
-function lpdf.adddocumentshade     (k,v) d_shades     [k] = v end
+    function lpdf.adddocumentextgstate (k,v) d_extgstates [k] = v end
+    function lpdf.adddocumentcolorspace(k,v) d_colorspaces[k] = v end
+    function lpdf.adddocumentpattern   (k,v) d_patterns   [k] = v end
+    function lpdf.adddocumentshade     (k,v) d_shades     [k] = v end
+
+    registerdocumentfinalizer(flushextgstates,3,"extended graphic states")
+    registerdocumentfinalizer(flushcolorspaces,3,"color spaces")
+    registerdocumentfinalizer(flushpatterns,3,"patterns")
+    registerdocumentfinalizer(flushshades,3,"shades")
 
-registerdocumentfinalizer(flushextgstates,3,"extended graphic states")
-registerdocumentfinalizer(flushcolorspaces,3,"color spaces")
-registerdocumentfinalizer(flushpatterns,3,"patterns")
-registerdocumentfinalizer(flushshades,3,"shades")
+    registerdocumentfinalizer(flushnames,3,"names") -- before catalog
+    registerdocumentfinalizer(flushcatalog,3,"catalog")
+    registerdocumentfinalizer(flushinfo,3,"info")
 
-registerdocumentfinalizer(flushnames,3,"names") -- before catalog
-registerdocumentfinalizer(flushcatalog,3,"catalog")
-registerdocumentfinalizer(flushinfo,3,"info")
+    registerpagefinalizer(checkextgstates,3,"extended graphic states")
+    registerpagefinalizer(checkcolorspaces,3,"color spaces")
+    registerpagefinalizer(checkpatterns,3,"patterns")
+    registerpagefinalizer(checkshades,3,"shades")
 
-registerpagefinalizer(checkextgstates,3,"extended graphic states")
-registerpagefinalizer(checkcolorspaces,3,"color spaces")
-registerpagefinalizer(checkpatterns,3,"patterns")
-registerpagefinalizer(checkshades,3,"shades")
+end
 
 -- in strc-bkm: lpdf.registerdocumentfinalizer(function() structures.bookmarks.place() end,1)
 
@@ -949,19 +988,23 @@ end
 
 -- ! -> universaltime
 
-local timestamp = os.date("%Y-%m-%dT%X") .. os.timezone(true)
+do
 
-function lpdf.timestamp()
-    return timestamp
-end
+    local timestamp = os.date("%Y-%m-%dT%X") .. os.timezone(true)
 
-function lpdf.pdftimestamp(str)
-    local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$")
-    return Y and format("D:%s%s%s%s%s%s%s%s'%s'",Y,M,D,h,m,s,Zs,Zh,Zm)
-end
+    function lpdf.timestamp()
+        return timestamp
+    end
+
+    function lpdf.pdftimestamp(str)
+        local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$")
+        return Y and format("D:%s%s%s%s%s%s%s%s'%s'",Y,M,D,h,m,s,Zs,Zh,Zm)
+    end
+
+    function lpdf.id()
+        return format("%s.%s",tex.jobname,timestamp)
+    end
 
-function lpdf.id()
-    return format("%s.%s",tex.jobname,timestamp)
 end
 
 -- return nil is nicer in test prints
@@ -1104,25 +1147,29 @@ end
 
 -- return formatters["BT /Span << /ActualText (CONTEXT) >> BDC [<feff>] TJ % t EMC ET"](code)
 
-local f_actual_text_one = formatters["BT /Span << /ActualText <feff%04x> >> BDC [<feff>] TJ %s EMC ET"]
-local f_actual_text_two = formatters["BT /Span << /ActualText <feff%04x%04x> >> BDC [<feff>] TJ %s EMC ET"]
-local f_actual_text     = formatters["/Span <</ActualText %s >> BDC"]
+do
 
-local context           = context
-local pdfdirect         = nodes.pool.pdfdirect
+    local f_actual_text_one = formatters["BT /Span << /ActualText <feff%04x> >> BDC [<feff>] TJ %s EMC ET"]
+    local f_actual_text_two = formatters["BT /Span << /ActualText <feff%04x%04x> >> BDC [<feff>] TJ %s EMC ET"]
+    local f_actual_text     = formatters["/Span <</ActualText %s >> BDC"]
 
-function codeinjections.unicodetoactualtext(unicode,pdfcode)
-    if unicode < 0x10000 then
-        return f_actual_text_one(unicode,pdfcode)
-    else
-        return f_actual_text_two(unicode/1024+0xD800,unicode%1024+0xDC00,pdfcode)
+    local context           = context
+    local pdfdirect         = nodes.pool.pdfdirect
+
+    function codeinjections.unicodetoactualtext(unicode,pdfcode)
+        if unicode < 0x10000 then
+            return f_actual_text_one(unicode,pdfcode)
+        else
+            return f_actual_text_two(unicode/1024+0xD800,unicode%1024+0xDC00,pdfcode)
+        end
     end
-end
 
-function commands.startactualtext(str)
-    context(pdfdirect(f_actual_text(tosixteen(str))))
-end
+    function commands.startactualtext(str)
+        context(pdfdirect(f_actual_text(tosixteen(str))))
+    end
+
+    function commands.stopactualtext()
+        context(pdfdirect("EMC"))
+    end
 
-function commands.stopactualtext()
-    context(pdfdirect("EMC"))
 end
diff --git a/tex/context/base/regi-ini.lua b/tex/context/base/regi-ini.lua
index 9484db7c7..c0a23cf42 100644
--- a/tex/context/base/regi-ini.lua
+++ b/tex/context/base/regi-ini.lua
@@ -15,7 +15,7 @@ runtime.</p>
 local commands, context = commands, context
 
 local utfchar = utf.char
-local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match
+local P, Cs, Cc, lpegmatch = lpeg.P, lpeg.Cs, lpeg.Cc, lpeg.match
 local char, gsub, format, gmatch, byte, match = string.char, string.gsub, string.format, string.gmatch, string.byte, string.match
 local next = next
 local insert, remove, fastcopy = table.insert, table.remove, table.fastcopy
@@ -99,6 +99,8 @@ local synonyms = { -- backward compatibility list
 
     ["windows"]      = "cp1252",
 
+    ["pdf"]          = "pdfdoc",
+
 }
 
 local currentregime = "utf"
@@ -132,7 +134,7 @@ end
 setmetatableindex(mapping,    loadregime)
 setmetatableindex(backmapping,loadreverse)
 
-local function translate(line,regime)
+local function fromregime(regime,line)
     if line and #line > 0 then
         local map = mapping[regime and synonyms[regime] or regime or currentregime]
         if map then
@@ -178,12 +180,15 @@ local function toregime(vector,str,default) -- toregime('8859-1',"abcde Ä","?")
     local r = c[d]
     if not r then
         local t = fastcopy(backmapping[vector])
-        setmetatableindex(t, function(t,k)
-            local v = d
-            t[k] = v
-            return v
-        end)
-        r = utf.remapper(t)
+     -- r = utf.remapper(t) -- not good for defaults here
+        local pattern = Cs((lpeg.utfchartabletopattern(t)/t + lpeg.patterns.utf8character/d + P(1)/d)^0)
+        r = function(str)
+            if not str or str == "" then
+                return ""
+            else
+                return lpegmatch(pattern,str)
+            end
+        end
         c[d]  = r
     end
     return r(str)
@@ -204,10 +209,11 @@ local function enable(regime)
     end
 end
 
-regimes.toregime  = toregime
-regimes.translate = translate
-regimes.enable    = enable
-regimes.disable   = disable
+regimes.toregime   = toregime
+regimes.fromregime = fromregime
+regimes.translate  = function(str,regime) return fromregime(regime,str) end
+regimes.enable     = enable
+regimes.disable    = disable
 
 -- The following function can be used when we want to make sure that
 -- utf gets passed unharmed. This is needed for modules.
@@ -216,7 +222,7 @@ local level = 0
 
 function regimes.process(str,filename,currentline,noflines,coding)
     if level == 0 and coding ~= "utf-8" then
-        str = translate(str,currentregime)
+        str = fromregime(currentregime,str)
         if trace_translating then
             report_translating("utf: %s",str)
         end
@@ -403,5 +409,5 @@ end
 -- local new = regimes.cleanup("cp1252",old)
 -- report_translating("%s -> %s",old,new)
 -- local old = "Pozn" .. char(0xE1) .. "mky"
--- local new = translate(old,"cp1250")
+-- local new = fromregime("cp1250",old)
 -- report_translating("%s -> %s",old,new)
diff --git a/tex/context/base/regi-pdfdoc.lua b/tex/context/base/regi-pdfdoc.lua
new file mode 100644
index 000000000..363d3ae0d
--- /dev/null
+++ b/tex/context/base/regi-pdfdoc.lua
@@ -0,0 +1,26 @@
+if not modules then modules = { } end modules ['regi-pdfdoc'] = {
+    version   = 1.001,
+    comment   = "companion to regi-ini.mkiv",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files"
+}
+
+return { [0] =
+    0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010,
+    0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC, 0x001F,
+    0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+    0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+    0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+    0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+    0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+    0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+    0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203A, 0x2212, 0x2030, 0x201E, 0x201C, 0x201D, 0x2018,
+    0x2019, 0x201A, 0x2122, 0xFB01, 0xFB02, 0x0141, 0x0152, 0x0160, 0x0178, 0x017D, 0x0131, 0x0142, 0x0153, 0x0161, 0x017E, 0x009F,
+    0x20AC, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0xFFFD, 0x00AE, 0x00AF,
+    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+    0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+    0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
+    0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+    0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF,
+}
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 51d01e48d..7fb1ecac9 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index f4d5e92d4..a9ca8b459 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
diff --git a/tex/context/base/strc-bkm.mkiv b/tex/context/base/strc-bkm.mkiv
index 9d2ebd796..5f1acb686 100644
--- a/tex/context/base/strc-bkm.mkiv
+++ b/tex/context/base/strc-bkm.mkiv
@@ -127,6 +127,38 @@
     }}%
 \to \everysetupbookmark
 
+%D There is a plugin mechanism but this is for experts only. The intermediate
+%D data structures are stable.
+%D
+%D \starttyping
+%D \startluacode
+%D     structures.bookmarks.installhandler("check before","before",function(levels)
+%D         logs.report("extra bookmarks","before (normal bookmarks)")
+%D         inspect(levels)
+%D         logs.report("extra bookmarks","before (extra bookmarks)")
+%D         inspect(structures.bookmarks.extras.get())
+%D         return levels
+%D     end)
+%D     structures.bookmarks.installhandler("check after", "after", function(levels)
+%D         logs.report("extra bookmarks","after (merged bookmarks)")
+%D         inspect(levels)
+%D         return levels
+%D     end)
+%D \stopluacode
+%D \starttyping
+%D
+%D This mechanism was added when bookmark inclusion became (optional) part of graphic
+%D inclusion (which is needed by Taco).
+%D
+%D \starttyping
+%D \getfiguredimensions[somefile.pdf]
+%D \dorecurse {\noffigurepages} {
+%D     \startTEXpage
+%D         \externalfigure[somefile.pdf][interaction=bookmark,page=\recurselevel]
+%D     \stopTEXpage
+%D }
+%D \starttyping
+
 \protect \endinput
 
 % \starttext
diff --git a/tex/context/base/supp-box.lua b/tex/context/base/supp-box.lua
index 3c5a3383d..c69486306 100644
--- a/tex/context/base/supp-box.lua
+++ b/tex/context/base/supp-box.lua
@@ -42,9 +42,11 @@ local setfield     = nuts.setfield
 local setbox       = nuts.setbox
 
 local free_node    = nuts.free
-local copy_list    = nuts.copy_list
+local flush_list   = nuts.flush_list
 local copy_node    = nuts.copy
+local copy_list    = nuts.copy_list
 local find_tail    = nuts.tail
+local traverse_id  = nuts.traverse_id
 
 local listtoutf    = nodes.listtoutf
 
@@ -84,6 +86,19 @@ end
 
 commands.hyphenatedlist = hyphenatedlist
 
+-- local function hyphenatedhack(head,pre)
+--     pre = tonut(pre)
+--     for n in traverse_id(disc_code,tonut(head)) do
+--         local hyphen = getfield(n,"pre")
+--         if hyphen then
+--             flush_list(hyphen)
+--         end
+--         setfield(n,"pre",copy_list(pre))
+--     end
+-- end
+--
+-- commands.hyphenatedhack = hyphenatedhack
+
 function commands.showhyphenatedinlist(list)
     report_hyphenation("show: %s",listtoutf(tonut(list),false,true))
 end
diff --git a/tex/context/base/supp-box.mkiv b/tex/context/base/supp-box.mkiv
index 66f373b72..bc1e30749 100644
--- a/tex/context/base/supp-box.mkiv
+++ b/tex/context/base/supp-box.mkiv
@@ -1063,7 +1063,7 @@
 %D \showhyphens{dohyphenatedword}
 %D \stoptyping
 
-\def\doshowhyphenatednextbox
+\unexpanded\def\doshowhyphenatednextbox
   {\ctxcommand{showhyphenatedinlist(tex.box[\number\nextbox].list)}}
 
 \unexpanded\def\showhyphens{\dowithnextboxcs\doshowhyphenatednextbox\hbox}
@@ -1076,7 +1076,7 @@
 %D \hyphenatedfile{tufte}
 %D \stoptyping
 
-\def\dohyphenatednextbox
+\unexpanded\def\dohyphenatednextbox
   {\ctxcommand{hyphenatedlist(tex.box[\number\nextbox].list)}%
    \unhbox\nextbox}
 
@@ -1084,6 +1084,20 @@
 \unexpanded\def\hyphenatedpar   {\dowithnextboxcs\dohyphenatednextbox\hbox}
 \unexpanded\def\hyphenatedfile#1{\dowithnextboxcs\dohyphenatednextbox\hbox{\readfile{#1}\donothing\donothing}}
 
+% D \starttyping
+% D \hyphenatedhack{\kern-.25em_}{alongword}
+% D \stoptyping
+%
+% \unexpanded\def\dohyphenatedhackbox
+%   {\ctxcommand{hyphenatedhack(tex.box[\number\nextbox].list,tex.box[\number\scratchbox].list)}%
+%    \unhbox\nextbox
+%    \endgroup}
+%
+% \unexpanded\def\hyphenatedhack#1% the result of a test, not that useful
+%   {\begingroup
+%    \setbox\scratchbox\hbox{#1}% only chars and kerns !
+%    \dowithnextboxcs\dohyphenatedhackbox\hbox}
+
 %D \macros
 %D   {processtokens}
 %D
diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua
index 15241bacc..efbac3f25 100644
--- a/tex/generic/context/luatex/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
 -- merged file : luatex-fonts-merged.lua
 -- parent file : luatex-fonts.lua
--- merge date  : 10/03/14 19:27:20
+-- merge date  : 10/06/14 00:29:22
 
 do -- begin closure to overcome local limits and interference
 
@@ -149,6 +149,8 @@ patterns.utfbom_16_le=utfbom_16_le
 patterns.utfbom_8=utfbom_8
 patterns.utf_16_be_nl=P("\000\r\000\n")+P("\000\r")+P("\000\n") 
 patterns.utf_16_le_nl=P("\r\000\n\000")+P("\r\000")+P("\n\000") 
+patterns.utf_32_be_nl=P("\000\000\000\r\000\000\000\n")+P("\000\000\000\r")+P("\000\000\000\n")
+patterns.utf_32_le_nl=P("\r\000\000\000\n\000\000\000")+P("\r\000\000\000")+P("\n\000\000\000")
 patterns.utf8one=R("\000\127")
 patterns.utf8two=R("\194\223")*utf8next
 patterns.utf8three=R("\224\239")*utf8next*utf8next
@@ -5151,11 +5153,12 @@ if not modules then modules={} end modules ['font-map']={
   copyright="PRAGMA ADE / ConTeXt Development Team",
   license="see context related readme files"
 }
-local tonumber=tonumber
+local tonumber,next,type=tonumber,next,type
 local match,format,find,concat,gsub,lower=string.match,string.format,string.find,table.concat,string.gsub,string.lower
 local P,R,S,C,Ct,Cc,lpegmatch=lpeg.P,lpeg.R,lpeg.S,lpeg.C,lpeg.Ct,lpeg.Cc,lpeg.match
 local utfbyte=utf.byte
 local floor=math.floor
+local formatters=string.formatters
 local trace_loading=false trackers.register("fonts.loading",function(v) trace_loading=v end)
 local trace_mapping=false trackers.register("fonts.mapping",function(v) trace_unimapping=v end)
 local report_fonts=logs.reporter("fonts","loading") 
@@ -5195,11 +5198,13 @@ local function makenameparser(str)
     return p
   end
 end
+local f_single=formatters["%04X"]
+local f_double=formatters["%04X%04X"]
 local function tounicode16(unicode,name)
   if unicode<0x10000 then
-    return format("%04X",unicode)
+    return f_single(unicode)
   elseif unicode<0x1FFFFFFFFF then
-    return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+    return f_double(floor(unicode/1024),unicode%1024+0xDC00)
   else
     report_fonts("can't convert %a in %a into tounicode",unicode,name)
   end
@@ -5209,9 +5214,9 @@ local function tounicode16sequence(unicodes,name)
   for l=1,#unicodes do
     local u=unicodes[l]
     if u<0x10000 then
-      t[l]=format("%04X",u)
+      t[l]=f_single(u)
     elseif unicode<0x1FFFFFFFFF then
-      t[l]=format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+      t[l]=f_double(floor(u/1024),u%1024+0xDC00)
     else
       report_fonts ("can't convert %a in %a into tounicode",u,name)
       return
@@ -5225,9 +5230,9 @@ local function tounicode(unicode,name)
     for l=1,#unicode do
       local u=unicode[l]
       if u<0x10000 then
-        t[l]=format("%04X",u)
+        t[l]=f_single(u)
       elseif u<0x1FFFFFFFFF then
-        t[l]=format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+        t[l]=f_double(floor(u/1024),u%1024+0xDC00)
       else
         report_fonts ("can't convert %a in %a into tounicode",u,name)
         return
@@ -5236,9 +5241,9 @@ local function tounicode(unicode,name)
     return concat(t)
   else
     if unicode<0x10000 then
-      return format("%04X",unicode)
+      return f_single(unicode)
     elseif unicode<0x1FFFFFFFFF then
-      return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+      return f_double(floor(unicode/1024),unicode%1024+0xDC00)
     else
       report_fonts("can't convert %a in %a into tounicode",unicode,name)
     end
@@ -5261,6 +5266,29 @@ mappings.fromunicode16=fromunicode16
 local ligseparator=P("_")
 local varseparator=P(".")
 local namesplitter=Ct(C((1-ligseparator-varseparator)^1)*(ligseparator*C((1-ligseparator-varseparator)^1))^0)
+local overloads={
+  IJ={ name="I_J",unicode={ 0x49,0x4A },mess=0x0132 },
+  ij={ name="i_j",unicode={ 0x69,0x6A },mess=0x0133 },
+  ff={ name="f_f",unicode={ 0x66,0x66 },mess=0xFB00 },
+  fi={ name="f_i",unicode={ 0x66,0x69 },mess=0xFB01 },
+  fl={ name="f_l",unicode={ 0x66,0x6C },mess=0xFB02 },
+  ffi={ name="f_f_i",unicode={ 0x66,0x66,0x69 },mess=0xFB03 },
+  ffl={ name="f_f_l",unicode={ 0x66,0x66,0x6C },mess=0xFB04 },
+  fj={ name="f_j",unicode={ 0x66,0x6A } },
+  fk={ name="f_k",unicode={ 0x66,0x6B } },
+}
+require("char-ini")
+for k,v in next,overloads do
+  local name=v.name
+  local mess=v.mess
+  if name then
+    overloads[name]=v
+  end
+  if mess then
+    overloads[mess]=v
+  end
+end
+mappings.overloads=overloads
 function mappings.addtounicode(data,filename)
   local resources=data.resources
   local properties=data.properties
@@ -5275,7 +5303,6 @@ function mappings.addtounicode(data,filename)
   unicodes['zwj']=unicodes['zwj']  or 0x200D
   unicodes['zwnj']=unicodes['zwnj']  or 0x200C
   local private=fonts.constructors.privateoffset
-  local unknown=format("%04X",utfbyte("?"))
   local unicodevector=fonts.encodings.agl.unicodes
   local missing={}
   local lumunic,uparser,oparser
@@ -5292,7 +5319,10 @@ function mappings.addtounicode(data,filename)
   for unic,glyph in next,descriptions do
     local index=glyph.index
     local name=glyph.name
-    if unic==-1 or unic>=private or (unic>=0xE000 and unic<=0xF8FF) or unic==0xFFFE or unic==0xFFFF then
+    local r=overloads[name]
+    if r then
+      glyph.unicode=r.unicode
+    elseif unic==-1 or unic>=private or (unic>=0xE000 and unic<=0xF8FF) or unic==0xFFFE or unic==0xFFFF then
       local unicode=lumunic and lumunic[name] or unicodevector[name]
       if unicode then
         glyph.unicode=unicode
@@ -5380,6 +5410,11 @@ function mappings.addtounicode(data,filename)
           end
         end
       end
+      local r=overloads[unicode]
+      if r then
+        unicode=r.unicode
+        glyph.unicode=unicode
+      end
       if not unicode then
         missing[name]=true
       end
@@ -5763,6 +5798,7 @@ afm.syncspace=true
 afm.addligatures=true 
 afm.addtexligatures=true 
 afm.addkerns=true 
+local overloads=fonts.mappings.overloads
 local applyruntimefixes=fonts.treatments and fonts.treatments.applyfixes
 local function setmode(tfmdata,value)
   if value then
@@ -5777,15 +5813,6 @@ registerafmfeature {
     node=setmode,
   }
 }
-local remappednames={
-  ff={ name="f_f",unicode={ 0x66,0x66 } },
-  fi={ name="f_i",unicode={ 0x66,0x69 } },
-  fj={ name="f_j",unicode={ 0x66,0x6A } },
-  fk={ name="f_k",unicode={ 0x66,0x6B } },
-  fl={ name="f_l",unicode={ 0x66,0x6C } },
-  ffi={ name="f_f_i",unicode={ 0x66,0x66,0x69 } },
-  ffl={ name="f_f_l",unicode={ 0x66,0x66,0x6C } },
-}
 local comment=P("Comment")
 local spacing=patterns.spacer 
 local lineend=patterns.newline 
@@ -6078,12 +6105,13 @@ end
 fixnames=function(data)
   for k,v in next,data.descriptions do
     local n=v.name
-    local r=remappednames[n]
+    local r=overloads[n]
     if r then
+      local name=r.name
       if trace_indexing then
-        report_afm("renaming characters %a to %a",n,r.name)
+        report_afm("renaming characters %a to %a",n,name)
       end
-      v.name=r.name
+      v.name=name
       v.unicode=r.unicode
     end
   end
@@ -6915,7 +6943,7 @@ local report_otf=logs.reporter("fonts","otf loading")
 local fonts=fonts
 local otf=fonts.handlers.otf
 otf.glists={ "gsub","gpos" }
-otf.version=2.801 
+otf.version=2.802 
 otf.cache=containers.define("fonts","otf",otf.version,true)
 local fontdata=fonts.hashes.identifiers
 local chardata=characters and characters.data