12 files changed, 260 insertions, 53 deletions
diff --git a/tex/context/base/back-exp.lua b/tex/context/base/back-exp.lua
index 5ceb360dc..46ddc4f53 100644
--- a/tex/context/base/back-exp.lua
+++ b/tex/context/base/back-exp.lua
@@ -1066,7 +1066,7 @@ local function stopexport(v)
     if handle then
         report_export("saving xml data in '%s",xmlfile)
         handle:write(format(xmlpreamble,tex.jobname,os.date(),environment.version,version))
-        if cssfile then
+        if type(cssfile) == "string"  then
             local cssfiles = settings_to_array(cssfile)
             for i=1,#cssfiles do
                 local cssfile = cssfiles[i]
@@ -1099,7 +1099,8 @@ local function stopexport(v)
         io.savedata(cssfile,concat(templates,"\n\n"))
         -- xhtml references
         if xhtmlfile then
-            if type(v) ~= "string" or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then
+            -- messy
+            if type(v) ~= "string" or xhtmlfile == true or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then
                 xhtmlfile = file.replacesuffix(xmlfile,"xhtml")
             else
                 xhtmlfile = file.addsuffix(xhtmlfile,"xhtml")
diff --git a/tex/context/base/back-exp.mkiv b/tex/context/base/back-exp.mkiv
index 4682e8047..88272fca7 100644
--- a/tex/context/base/back-exp.mkiv
+++ b/tex/context/base/back-exp.mkiv
@@ -110,13 +110,18 @@
 \def\c!xhtml   {xhtml}
 
 \appendtoks
+    \doifsomething{\backendparameter\c!xhtml}
+      {\enabledirectives[backend.export.xhtml=\backendparameter\c!xhtml]}%
+    \doifsomething{\backendparameter\c!css}
+      {\enabledirectives[backend.export.css={\backendparameter\c!css}]}%
+\to \everysetupbackend
+
+\appendtoks
     \doifsomething{\backendparameter\c!export}
       {\setupstructure
          [\c!state=\v!start]%
        \enabledirectives
-         [backend.export=\backendparameter\c!export,%
-          backend.export.xhtml=\backendparameter\c!xhtml,%
-          backend.export.css={\backendparameter\c!css}]}%
+         [backend.export=\backendparameter\c!export]}%
 \to \everysetupbackend
 
 \protect \endinput
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index 7f8c2db2f..c85bb3f49 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -386,10 +386,15 @@ local is_spacing = allocate ( table.tohash {
     "zs", "zl","zp",
 } )
 
+local is_mark = allocate ( table.tohash {
+    "mn", "ms",
+} )
+
 characters.is_character = is_character
 characters.is_letter    = is_letter
 characters.is_command   = is_command
 characters.is_spacing   = is_spacing
+characters.is_mark      = is_mark
 
 local mt = { -- yes or no ?
     __index = function(t,k)
@@ -511,17 +516,6 @@ function characters.define(tobelettered, tobeactivated) -- catcodetables
             local contextname = chr.contextname
             if contextname then
                 local category = chr.category
---~                 if is_character[category] then
---~                     if chr.unicodeslot < 128 then
---~                         texprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u))
---~                     else
---~                         texprint(ctxcatcodes,format("\\let\\%s=%s",contextname,utfchar(u)))
---~                     end
---~                 elseif is_command[category] then
---~                     texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -- no texprint
---~                     a = a + 1
---~                     activated[a] = u
---~                 end
                 if is_character[category] then
                     if chr.unicodeslot < 128 then
                         if is_letter[category] then
@@ -608,35 +602,32 @@ function characters.setcodes()
         report_defining("defining lc and uc codes")
     end
     for code, chr in next, data do
-        local cc = chr.category  -- mn lo
-        if cc == 'll' or cc == 'lu' or cc == 'lt' then
-            local lc, uc = chr.lccode, chr.uccode
-            if not lc then chr.lccode, lc = code, code end
-            if not uc then chr.uccode, uc = code, code end
-            texsetcatcode(code,11)   -- letter
-            if type(lc) == "table" then
-                lc = code
-            end
-            if type(uc) == "table" then
-                uc = code
-            end
-            texsetlccode(code,lc,uc)
-            if cc == "lu" then
-                texsetsfcode(code,999)
-            end
-        elseif cc == "lo" then
+        local cc = chr.category
+        if is_letter[cc] then
             local range = chr.range
             if range then
                 for i=range.first,range.last do
                     texsetcatcode(i,11) -- letter
                     texsetlccode(i,i,i) -- self self
                 end
-            else -- letter
-                texsetcatcode(code,11)
-                texsetlccode(code,code,code)
+            else
+                local lc, uc = chr.lccode, chr.uccode
+                if not lc then chr.lccode, lc = code, code end
+                if not uc then chr.uccode, uc = code, code end
+                texsetcatcode(code,11)   -- letter
+                if type(lc) == "table" then
+                    lc = code
+                end
+                if type(uc) == "table" then
+                    uc = code
+                end
+                texsetlccode(code,lc,uc)
+                if cc == "lu" then
+                    texsetsfcode(code,999)
+                end
             end
-        elseif cc == "mn" then -- mark
-            texsetlccode(code,code,code)
+        elseif is_mark[cc] then
+            texsetlccode(code,code,code) -- for hyphenation
         end
     end
 end
diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua
index c509231e3..30124a6a2 100644
--- a/tex/context/base/char-utf.lua
+++ b/tex/context/base/char-utf.lua
@@ -20,7 +20,7 @@ over a string.</p>
 --ldx]]--
 
 local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub
-local concat, gmatch, gsub = table.concat, string.gmatch, string.gsub
+local concat, gmatch, gsub, find = table.concat, string.gmatch, string.gsub, string.find
 local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues
 local allocate = utilities.storage.allocate
 
@@ -34,6 +34,9 @@ local characters      = characters
 characters.graphemes  = allocate()
 local graphemes       = characters.graphemes
 
+characters.decomposed = allocate()
+local decomposed      = characters.decomposed
+
 characters.mathpairs  = allocate()
 local mathpairs       = characters.mathpairs
 
@@ -48,13 +51,34 @@ local utffilters      = characters.filters.utf
 source code to depend on collapsing.</p>
 --ldx]]--
 
+-- for the moment, will be entries in char-def.lua
+
+local decomposed = allocate {
+    ["Ĳ"] = "IJ",
+    ["ĳ"] = "ij",
+    ["և"] = "եւ",
+    ["ﬀ"] = "ff",
+    ["ﬁ"] = "fi",
+    ["ﬂ"] = "fl",
+    ["ﬃ"] = "ffi",
+    ["ﬄ"] = "ffl",
+    ["ﬅ"] = "ſt",
+    ["ﬆ"] = "st",
+    ["ﬓ"] = "մն",
+    ["ﬔ"] = "մե",
+    ["ﬕ"] = "մի",
+    ["ﬖ"] = "վն",
+    ["ﬗ"] = "մխ",
+}
+characters.decomposed = decomposed
+
 local function initialize()
-    for k,v in next, characters.data do
+    for unicode, v in next, characters.data do
         -- using vs and first testing for length is faster (.02->.01 s)
         local vs = v.specials
         if vs and #vs == 3 and vs[1] == 'char' then
             local one, two = vs[2], vs[3]
-            local first, second, combined = utfchar(one), utfchar(two), utfchar(k)
+            local first, second, combined = utfchar(one), utfchar(two), utfchar(unicode)
             local cgf = graphemes[first]
             if not cgf then
                 cgf = { }
@@ -67,7 +91,7 @@ local function initialize()
                     mps = { }
                     mathpairs[two] = mps
                 end
-                mps[one] = k
+                mps[one] = unicode -- here unicode
                 local mps = mathpairs[second]
                 if not mps then
                     mps = { }
@@ -75,6 +99,26 @@ local function initialize()
                 end
                 mps[first] = combined
             end
+     -- else
+     --     local description = v.description
+     --     if find(description,"LIGATURE") then
+     --         if vs then
+     --             local t = { }
+     --             for i=2,#vs do
+     --                 t[#t+1] = utfchar(vs[i])
+     --             end
+     --             decomposed[utfchar(unicode)] = concat(t)
+     --         else
+     --             local vs = v.shcode
+     --             if vs then
+     --                 local t = { }
+     --                 for i=1,#vs do
+     --                     t[i] = utfchar(vs[i])
+     --                 end
+     --                 decomposed[utfchar(unicode)] = concat(t)
+     --             end
+     --         end
+     --     end
         end
     end
     initialize = false
@@ -164,6 +208,113 @@ not collecting tokens is not only faster but also saves garbage collecting.
 --ldx]]--
 
 -- lpeg variant is not faster
+--
+-- I might use the combined loop at some point for the filter
+-- some day.
+
+--~ function utffilters.collapse(str) -- not really tested (we could preallocate a table)
+--~     if str and str ~= "" then
+--~         local nstr = #str
+--~         if nstr > 1 then
+--~             if initialize then -- saves a call
+--~                 initialize()
+--~             end
+--~             local tokens, t, first, done, n = { }, 0, false, false, 0
+--~             for second in utfcharacters(str) do
+--~                 local dec = decomposed[second]
+--~                 if dec then
+--~                     if not done then
+--~                         if n > 0 then
+--~                             for s in utfcharacters(str) do
+--~                                 if n == 1 then
+--~                                     break
+--~                                 else
+--~                                     t = t + 1
+--~                                     tokens[t] = s
+--~                                     n = n - 1
+--~                                 end
+--~                             end
+--~                         end
+--~                         done = true
+--~                     elseif first then
+--~                         t = t + 1
+--~                         tokens[t] = first
+--~                     end
+--~                     t = t + 1
+--~                     tokens[t] = dec
+--~                     first = false
+--~                 elseif done then
+--~                     local crs = high[second]
+--~                     if crs then
+--~                         if first then
+--~                             t = t + 1
+--~                             tokens[t] = first
+--~                         end
+--~                         first = crs
+--~                     else
+--~                         local cgf = graphemes[first]
+--~                         if cgf and cgf[second] then
+--~                             first = cgf[second]
+--~                         elseif first then
+--~                             t = t + 1
+--~                             tokens[t] = first
+--~                             first = second
+--~                         else
+--~                             first = second
+--~                         end
+--~                     end
+--~                 else
+--~                     local crs = high[second]
+--~                     if crs then
+--~                         for s in utfcharacters(str) do
+--~                             if n == 1 then
+--~                                 break
+--~                             else
+--~                                 t = t + 1
+--~                                 tokens[t] = s
+--~                                 n = n - 1
+--~                             end
+--~                         end
+--~                         if first then
+--~                             t = t + 1
+--~                             tokens[t] = first
+--~                         end
+--~                         first = crs
+--~                         done = true
+--~                     else
+--~                         local cgf = graphemes[first]
+--~                         if cgf and cgf[second] then
+--~                             for s in utfcharacters(str) do
+--~                                 if n == 1 then
+--~                                     break
+--~                                 else
+--~                                     t = t + 1
+--~                                     tokens[t] = s
+--~                                     n = n - 1
+--~                                 end
+--~                             end
+--~                             first = cgf[second]
+--~                             done = true
+--~                         else
+--~                             first = second
+--~                             n = n + 1
+--~                         end
+--~                     end
+--~                 end
+--~             end
+--~             if done then
+--~                 if first then
+--~                     t = t + 1
+--~                     tokens[t] = first
+--~                 end
+--~                 return concat(tokens) -- seldom called
+--~             end
+--~         elseif nstr > 0 then
+--~             return high[str] or str
+--~         end
+--~     end
+--~     return str
+--~ end
 
 function utffilters.collapse(str) -- not really tested (we could preallocate a table)
     if str and str ~= "" then
@@ -203,7 +354,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
                             else
                                 t = t + 1
                                 tokens[t] = s
-                                n = n -1
+                                n = n - 1
                             end
                         end
                         if first then
@@ -221,7 +372,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
                                 else
                                     t = t + 1
                                     tokens[t] = s
-                                    n = n -1
+                                    n = n - 1
                                 end
                             end
                             first = cgf[second]
@@ -234,8 +385,10 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
                 end
             end
             if done then
-                t = t + 1
-                tokens[t] = first
+                if first then
+                    t = t + 1
+                    tokens[t] = first
+                end
                 return concat(tokens) -- seldom called
             end
         elseif nstr > 0 then
@@ -245,11 +398,61 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
     return str
 end
 
+function utffilters.decompose(str)
+    if str and str ~= "" then
+        local nstr = #str
+        if nstr > 1 then
+         -- if initialize then -- saves a call
+         --     initialize()
+         -- end
+            local tokens, t, done, n = { }, 0, false, 0
+            for s in utfcharacters(str) do
+                local dec = decomposed[s]
+                if dec then
+                    if not done then
+                        if n > 0 then
+                            for s in utfcharacters(str) do
+                                if n == 1 then
+                                    break
+                                else
+                                    t = t + 1
+                                    tokens[t] = s
+                                    n = n - 1
+                                end
+                            end
+                        end
+                        done = true
+                    end
+                    t = t + 1
+                    tokens[t] = dec
+                elseif done then
+                    t = t + 1
+                    tokens[t] = s
+                else
+                    n = n + 1
+                end
+            end
+            if done then
+                return concat(tokens) -- seldom called
+            end
+        end
+    end
+    return str
+end
+
 local textfileactions = resolvers.openers.helpers.textfileactions
 
 utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse")
 utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
 
+utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose")
+utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
+
+function characters.filters.utf.enable()
+    utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
+    utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
+end
+
 --[[ldx--
 <p>Next we implement some commands that are used in the user interface.</p>
 --ldx]]--
diff --git a/tex/context/base/char-utf.mkiv b/tex/context/base/char-utf.mkiv
index b59d2f569..261735656 100644
--- a/tex/context/base/char-utf.mkiv
+++ b/tex/context/base/char-utf.mkiv
@@ -30,9 +30,16 @@
 
 % resolvers.filters.install('utf',characters.filters.utf.collapse)
 
+% \appendtoks
+%     \ctxlua{
+%         local textfileactions = resolvers.openers.helpers.textfileactions
+%         utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
+%         utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
+%     }%
+% \to \everyjob
+
 \appendtoks
-    \ctxlua{utilities.sequencers.enableaction
-        (resolvers.openers.textfileactions,"characters.filters.utf.collapse")}%
+    \ctxlua{characters.filters.utf.enable()}%
 \to \everyjob
 
 %D The next one influences input parsing.
diff --git a/tex/context/base/cont-new.mkii b/tex/context/base/cont-new.mkii
index b4b8b50dc..90a6dd6cc 100644
--- a/tex/context/base/cont-new.mkii
+++ b/tex/context/base/cont-new.mkii
@@ -11,7 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
-\newcontextversion{2011.05.05 12:10}
+\newcontextversion{2011.05.06 16:52}
 
 %D This file is loaded at runtime, thereby providing an
 %D excellent place for hacks, patches, extensions and new
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index b1745d6ef..f85035371 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
-\newcontextversion{2011.05.05 12:10}
+\newcontextversion{2011.05.06 16:52}
 
 %D This file is loaded at runtime, thereby providing an
 %D excellent place for hacks, patches, extensions and new
diff --git a/tex/context/base/context.mkii b/tex/context/base/context.mkii
index 574d9fa39..3459ff535 100644
--- a/tex/context/base/context.mkii
+++ b/tex/context/base/context.mkii
@@ -20,7 +20,7 @@
 %D your styles an modules.
 
 \edef\contextformat {\jobname}
-\edef\contextversion{2011.05.05 12:10}
+\edef\contextversion{2011.05.06 16:52}
 
 %D For those who want to use this:
 
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index 39d1f3df4..c2b49422f 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -20,7 +20,7 @@
 %D your styles an modules.
 
 \edef\contextformat {\jobname}
-\edef\contextversion{2011.05.05 12:10}
+\edef\contextversion{2011.05.06 16:52}
 
 %D For those who want to use this:
 
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 30de4263a..5e38335cf 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index b6a7cce99..c34365ed2 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua
index 11fc6e347..f6aeb6327 100644
--- a/tex/generic/context/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
 -- merged file : luatex-fonts-merged.lua
 -- parent file : luatex-fonts.lua
--- merge date  : 05/05/11 12:10:55
+-- merge date  : 05/06/11 16:52:12
 
 do -- begin closure to overcome local limits and interference