13 files changed, 136 insertions, 49 deletions
diff --git a/tex/context/base/back-exp.mkiv b/tex/context/base/back-exp.mkiv
index c7696d383..7fd1b5799 100644
--- a/tex/context/base/back-exp.mkiv
+++ b/tex/context/base/back-exp.mkiv
@@ -113,9 +113,9 @@
     \unexpanded\def\dotagsetnotesymbol{\taggedctxcommand{settagdescriptionsymbol("\currentnote",\currentnotenumber)}}%
 \to \everyenableelements
 
-\appendtoks
-    \unexpanded\def\doverbatimspace{\char32\relax}% will be done permanently
-\to \everyenableelements
+% \appendtoks
+%     \unexpanded\def\doverbatimspace{\asciispacechar}% will be done permanently
+% \to \everyenableelements
 
 % The action: \setupbackend[export=yes] % or filename
 
diff --git a/tex/context/base/buff-ver.mkiv b/tex/context/base/buff-ver.mkiv
index 10002c4a7..7a4f28253 100644
--- a/tex/context/base/buff-ver.mkiv
+++ b/tex/context/base/buff-ver.mkiv
@@ -376,6 +376,15 @@
 \unexpanded\def\specialstretchedspace{\hskip.5\interwordspace\s!plus.125\interwordspace\relax} % \interwordstretch can be zero
 \unexpanded\def\specialcontrolspace  {\hskip\zeropoint\hbox{\normalcontrolspace}\hskip\zeropoint\relax}
 
+% \unexpanded\def\taggedspecialfixedspace    {\hskip\zeropoint\asciispacechar\hskip\zeropoint}
+% \unexpanded\def\taggedspecialobeyedspace   {\hskip\zeropoint\asciispacechar\hskip\zeropoint}
+% \unexpanded\def\taggedspecialstretchedspace{\hskip\zeropoint\asciispacechar\hskip\zeropoint}
+% \unexpanded\def\taggedspecialcontrolspace  {\hskip\zeropoint\hbox{\normalcontrolspace}\hskip\zeropoint\relax}
+
+\appendtoks
+    \unexpanded\def\obeyedspace{\hskip\zeropoint\asciispacechar\hskip\zeropoint}%
+\to \everyenableelements
+
 \unexpanded\def\obeyhyphens
   {\let\obeyedspace \specialobeyedspace % maybe \specialstretchedspace
    \let\controlspace\specialcontrolspace
@@ -873,7 +882,7 @@
 \newcount      \c_buff_verbatim_current
 \newconditional\c_buff_optimize_linebreaks
 
-\def\doverbatimspace           {\obeyedspace}
+           \def\doverbatimspace           {\obeyedspace}
 
 \unexpanded\def\doinlineverbatimstart     {}
 \unexpanded\def\doinlineverbatimstop      {}
diff --git a/tex/context/base/char-act.mkiv b/tex/context/base/char-act.mkiv
index 011c29d07..7d7268c8b 100644
--- a/tex/context/base/char-act.mkiv
+++ b/tex/context/base/char-act.mkiv
@@ -24,6 +24,8 @@
 %D \NEWLINE\ and \NEWPAGE\ active and assigning them
 %D \type{\obeysomething}, but first we set some default values.
 
+% These are expandable!
+
 \def\obeyedspace {\space}
 \def\obeyedtab   {\obeyedspace}
 \def\obeyedline  {\par}
@@ -36,7 +38,10 @@
 %D spaces (control spaces) we only have to adapt the definition
 %D of \type{\obeyedspace} to:
 
-\unexpanded\def\controlspace{\hbox{\char32}} % rather tex, we need the unicode value
+\chardef\asciispacechar\spaceasciicode % a real space character
+
+\unexpanded\def\naturalspace{\asciispacechar}
+\unexpanded\def\controlspace{\hbox{\asciispacechar}}               % rather tex, we need the unicode value
 \unexpanded\def\normalspaces{\catcode\spaceasciicode\spacecatcode}
 
 \bgroup
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index 11d62d298..2e16f0d01 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
-\newcontextversion{2014.05.30 23:26}
+\newcontextversion{2014.06.01 13:44}
 
 %D This file is loaded at runtime, thereby providing an excellent place for
 %D hacks, patches, extensions and new features.
diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf
index a3983ea30..2b7bdd083 100644
--- a/tex/context/base/context-version.pdf
+++ b/tex/context/base/context-version.pdf
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index 22a770b9d..c56ce4e55 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -28,7 +28,7 @@
 %D up and the dependencies are more consistent.
 
 \edef\contextformat {\jobname}
-\edef\contextversion{2014.05.30 23:26}
+\edef\contextversion{2014.06.01 13:44}
 \edef\contextkind   {beta}
 
 %D For those who want to use this:
diff --git a/tex/context/base/mult-low.lua b/tex/context/base/mult-low.lua
index 2bae5a0b0..faa1302a9 100644
--- a/tex/context/base/mult-low.lua
+++ b/tex/context/base/mult-low.lua
@@ -126,7 +126,7 @@ return {
         "twoperemspace", "threeperemspace", "fourperemspace", "fiveperemspace", "sixperemspace",
         "figurespace", "punctuationspace", "hairspace",
         "zerowidthspace", "zerowidthnonjoiner", "zerowidthjoiner", "zwnj", "zwj",
-        "optionalspace",
+        "optionalspace", "asciispacechar",
     },
     ["helpers"] = {
         --
@@ -241,7 +241,7 @@ return {
         "removetoks", "appendtoks", "prependtoks", "appendtotoks", "prependtotoks", "to",
         --
         "endgraf", "endpar", "everyendpar", "reseteverypar", "finishpar", "empty", "null", "space", "quad", "enspace", "nbsp",
-        "obeyspaces", "obeylines", "obeyedspace", "obeyedline",
+        "obeyspaces", "obeylines", "obeyedspace", "obeyedline", "obeyedtab", "obeyedpage",
         "normalspace",
         --
         "executeifdefined",
diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua
index 42d83188e..d1eaacd15 100644
--- a/tex/context/base/sort-ini.lua
+++ b/tex/context/base/sort-ini.lua
@@ -39,11 +39,18 @@ relatively easy to do.</p>
 how they map onto this mechanism. I've learned that users can come up
 with any demand so nothing here is frozen.</p>
 
+<p>Todo: I ran into the Unicode Collation document and noticed that
+there are some similarities (like the weights) but using that method
+would still demand extra code for language specifics. One option is
+to use the allkeys.txt file for the uc vectors but then we would also
+use the collapsed key (sq, code is now commented). In fact, we could
+just hook those into the replacer code that we reun beforehand.</p>
+
 <p>In the future index entries will become more clever, i.e. they will
 have language etc properties that then can be used.</p>
 ]]--
 
-local gsub, rep, sub, sort, concat = string.gsub, string.rep, string.sub, table.sort, table.concat
+local gsub, rep, sub, sort, concat, tohash, format = string.gsub, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format
 local utfbyte, utfchar, utfcharacters, utfvalues = utf.byte, utf.char, utf.characters, utf.values
 local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
 
@@ -52,6 +59,7 @@ local setmetatableindex = table.setmetatableindex
 
 local trace_tests       = false  trackers.register("sorters.tests",   function(v) trace_tests   = v end)
 local trace_methods     = false  trackers.register("sorters.methods", function(v) trace_methods = v end)
+local trace_orders      = false  trackers.register("sorters.orders",  function(v) trace_orders  = v end)
 
 local report_sorters    = logs.reporter("languages","sorters")
 
@@ -65,7 +73,9 @@ local digitsoffset      = 0x20000 -- frozen
 local digitsmaximum     = 0xFFFFF -- frozen
 
 local lccodes           = characters.lccodes
+local uccodes           = characters.uccodes
 local lcchars           = characters.lcchars
+local ucchars           = characters.ucchars
 local shchars           = characters.shchars
 local fscodes           = characters.fscodes
 local fschars           = characters.fschars
@@ -81,7 +91,7 @@ local v_after           = variables.after
 local v_first           = variables.first
 local v_last            = variables.last
 
-local validmethods      = table.tohash {
+local validmethods      = tohash {
     "ch", -- raw character (for tracing)
     "mm", -- minus mapping
     "zm", -- zero  mapping
@@ -169,12 +179,12 @@ local function preparetables(data)
         __index = function(t,k)
             local n, nn
             if k then
-                if trace_tests then
+                if trace_orders then
                     report_sorters("simplifing character %C",k)
                 end
                 local l = lower[k] or lcchars[k]
                 if l then
-                    if trace_tests then
+                    if trace_orders then
                         report_sorters(" 1 lower: %C",l)
                     end
                     local ml = rawget(t,l)
@@ -185,7 +195,7 @@ local function preparetables(data)
                             nn = nn + 1
                             n[nn] = ml[i] + (t.__delta or 0)
                         end
-                        if trace_tests then
+                        if trace_orders then
                             report_sorters(" 2 order: % t",n)
                         end
                     end
@@ -193,7 +203,7 @@ local function preparetables(data)
                 if not n then
                     local s = shchars[k] -- maybe all components?
                     if s and s ~= k then
-                        if trace_tests then
+                        if trace_orders then
                             report_sorters(" 3 shape: %C",s)
                         end
                         n = { }
@@ -201,7 +211,7 @@ local function preparetables(data)
                         for l in utfcharacters(s) do
                             local ml = rawget(t,l)
                             if ml then
-                                if trace_tests then
+                                if trace_orders then
                                     report_sorters(" 4 keep: %C",l)
                                 end
                                 if ml then
@@ -213,7 +223,7 @@ local function preparetables(data)
                             else
                                 l = lower[l] or lcchars[l]
                                 if l then
-                                    if trace_tests then
+                                    if trace_orders then
                                         report_sorters(" 5 lower: %C",l)
                                     end
                                     local ml = rawget(t,l)
@@ -232,7 +242,7 @@ local function preparetables(data)
                      --
                      -- s = fschars[k]
                      -- if s and s ~= k then
-                     --     if trace_tests then
+                     --     if trace_orders then
                      --         report_sorters(" 6 split: %s",s)
                      --     end
                      --     local ml = rawget(t,s)
@@ -247,24 +257,24 @@ local function preparetables(data)
                      -- end
                         local b = utfbyte(k)
                         n = decomposed[b] or { b }
-                        if trace_tests then
+                        if trace_orders then
                             report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
                         end
                     end
                     if n then
-                        if trace_tests then
+                        if trace_orders then
                             report_sorters(" 7 order: % t",n)
                         end
                     else
                         n = noorder
-                        if trace_tests then
+                        if trace_orders then
                             report_sorters(" 8 order: 0")
                         end
                     end
                 end
             else
                 n = noorder
-                if trace_tests then
+                if trace_orders then
                     report_sorters(" 9 order: 0")
                 end
             end
@@ -334,8 +344,8 @@ local function setlanguage(l,m,d,u)
             report_sorters("invalid sorter method %a in %a",s,method)
         end
     end
+    usedinsequence = tohash(sequence)
     data.sequence = sequence
-    usedinsequence = table.tohash(sequence)
     data.usedinsequence = usedinsequence
 -- usedinsequence.ch = true -- better just store the string
     if trace_tests then
@@ -387,7 +397,6 @@ local function basic(a,b) -- trace ea and eb
         for j=1,#sequence do
             local m = sequence[j]
             result = basicsort(ea[m],eb[m])
--- print(m,result)
             if result ~= 0 then
                 return result
             end
@@ -439,6 +448,36 @@ local function basic(a,b) -- trace ea and eb
     end
 end
 
+-- if we use sq:
+--
+-- local function basic(a,b) -- trace ea and eb
+--     local ea, eb = a.split, b.split
+--     local na, nb = #ea, #eb
+--     if na == 0 and nb == 0 then
+--         -- simple variant (single word)
+--         return basicsort(ea.sq,eb.sq)
+--     else
+--         -- complex variant, used in register (multiple words)
+--         local result = 0
+--         for i=1,nb < na and nb or na do
+--             local eai, ebi = ea[i], eb[i]
+--             result = basicsort(ea.sq,eb.sq)
+--             if result ~= 0 then
+--                 return result
+--             end
+--         end
+--         if result ~= 0 then
+--             return result
+--         elseif na > nb then
+--             return 1
+--         elseif nb > na then
+--             return -1
+--         else
+--             return 0
+--         end
+--     end
+-- end
+
 comparers.basic = basic
 
 function sorters.basicsorter(a,b)
@@ -531,10 +570,15 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean
         else
             n = n + 1
             local l = lower[sc]
-            l = l and utfbyte(l) or lccodes[b]
+            l = l and utfbyte(l) or lccodes[b] or b
+         -- local u = upper[sc]
+         -- u = u and utfbyte(u) or uccodes[b] or b
             if type(l) == "table" then
                 l = l[1] -- there are currently no tables in lccodes but it can be some, day
             end
+         -- if type(u) == "table" then
+         --     u = u[1] -- there are currently no tables in lccodes but it can be some, day
+         -- end
             z_case[n] = l
             if l ~= b then
                 m_case[n] = l - 1
@@ -593,9 +637,9 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean
     --         p_mapping = { p_mappings[fs][1] }
     --     end
     -- end
-
+    local result
     if checked then
-        return {
+        result = {
             ch = trace_tests       and char      or nil, -- not in sequence
             uc = usedinsequence.uc and byte      or nil,
             mc = usedinsequence.mc and m_case    or nil,
@@ -606,7 +650,7 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean
             pm = usedinsequence.pm and p_mapping or nil,
         }
     else
-        return {
+        result = {
             ch = char,
             uc = byte,
             mc = m_case,
@@ -617,7 +661,15 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean
             pm = p_mapping,
         }
     end
-
+ -- local sq, n = { }, 0
+ -- for i=1,#byte do
+ --     for s=1,#sequence do
+ --         n = n + 1
+ --         sq[n] = result[sequence[s]][i]
+ --     end
+ -- end
+ -- result.sq = sq
+    return result
 end
 
 local function packch(entry)
@@ -648,11 +700,11 @@ local function packuc(entry)
     if #split > 0 then -- useless test
         local t = { }
         for i=1,#split do
-            t[i] = concat(split[i].uc, " ")
+            t[i] = concat(split[i].uc, " ") -- sq
         end
         return concat(t," + ")
     else
-        return concat(split.uc," ")
+        return concat(split.uc," ") -- sq
     end
 end
 
diff --git a/tex/context/base/spac-ver.lua b/tex/context/base/spac-ver.lua
index 018881663..55c135cf6 100644
--- a/tex/context/base/spac-ver.lua
+++ b/tex/context/base/spac-ver.lua
@@ -879,6 +879,8 @@ local special_penalty_xxx =     0
 -- header don't break but also make sure that we have at least a decent
 -- break when we have succesive ones (often when testing)
 
+-- todo: mark headers as such so that we can recognize them
+
 local specialmethods = { }
 local specialmethod  = 1
 
@@ -927,10 +929,21 @@ specialmethods[1] = function(start,penalty)
                         return
                     end
                 elseif trace_specials then
-                    report_specials("  context %a, higher level, continue",p)
+                    report_specials("  context penalty %a, higher level, continue",p)
+                end
+            else
+                local p = getfield(current,"penalty")
+                if p < 10000 then
+                    -- assume some other mechanism kicks in so we seem to have content
+                    if trace_specials then
+                        report_specials("  regular penalty %a, quitting",p)
+                    end
+                    break
+                else
+                    if trace_specials then
+                        report_specials("  regular penalty %a, continue",p)
+                    end
                 end
-            elseif trace_specials then
-                report_specials("  regular penalty, continue")
             end
         end
         current = getprev(current)
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 55046b375..9c73215cc 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index c1435146e..b43a62bf2 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
diff --git a/tex/context/base/strc-syn.lua b/tex/context/base/strc-syn.lua
index e27974eb2..2ca428455 100644
--- a/tex/context/base/strc-syn.lua
+++ b/tex/context/base/strc-syn.lua
@@ -139,23 +139,26 @@ function synonyms.sort(data,options)
     sorters.sort(data.result,synonyms.compare)
 end
 
-function synonyms.finalize(data,options)
+function synonyms.finalize(data,options) -- mostly the same as registers so we will generalize it: sorters.split
     local result = data.result
     data.metadata.nofsorted = #result
-    local split = { }
+    local split, nofsplit, lasttag, done, nofdone = { }, 0, nil, nil, 0
+    local firstofsplit = sorters.firstofsplit
     for k=1,#result do
         local v = result[k]
         local entry, tag = firstofsplit(v)
-        local s = split[entry] -- keeps track of change
-        local d
-        if not s then
-            d = { }
-            s = { tag = tag, data = d }
-            split[entry] = s
-        else
-            d = s.data
+        if tag ~= lasttag then
+         -- if trace_registers then
+         --     report_registers("splitting at %a",tag)
+         -- end
+            done     = { }
+            nofdone  = 0
+            nofsplit = nofsplit + 1
+            lasttag  = tag
+            split[nofsplit] = { tag = tag, data = done }
         end
-        d[#d+1] = v
+        nofdone = nofdone + 1
+        done[nofdone] = v
     end
     data.result = split
 end
@@ -168,10 +171,9 @@ local ctx_synonymentry = context.synonymentry
 function synonyms.flush(data,options)
     local kind = data.metadata.kind -- hack, will be done better
     local result = data.result
-    local sorted = table.sortedkeys(result)
-    for k=1,#sorted do
-        local letter = sorted[k]
-        local sublist = result[letter]
+    for i=1,#result do
+        local sublist = result[i]
+        local letter = sublist.tag
         local data = sublist.data
         for d=1,#data do
             local entry = data[d].definition
diff --git a/tex/context/base/strc-tag.mkiv b/tex/context/base/strc-tag.mkiv
index 6e792fd3f..7e15be4a3 100644
--- a/tex/context/base/strc-tag.mkiv
+++ b/tex/context/base/strc-tag.mkiv
@@ -11,6 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
+% labels: no language needed
 % key/values and other names might change (and probably will)
 
 \writestatus{loading}{ConTeXt Structure Macros / Tags}
@@ -176,6 +177,11 @@
      \expandafter\strc_tags_element_stop_yes
    \fi}
 
+% if mainlanguage == en we can even omit the label (default to tag) which is faster
+%
+% \unexpanded\def\strc_tags_element_start_yes_indeed_yes[#1][#2]%
+%   {\ctxcommand{starttag("#1",{label="#1",userdata=\!!bs#2\!!es})}}
+
 \unexpanded\def\strc_tags_element_start_yes_indeed_yes[#1][#2]%
   {\ctxcommand{starttag("#1",{label="\dogetupsometaglabeltext{#1}",userdata=\!!bs#2\!!es})}}