2014-06-27 10:56:00

author: Context Git Mirror Bot <phg42.2a@gmail.com> 2014-06-27 11:15:04 +0200
committer: Context Git Mirror Bot <phg42.2a@gmail.com> 2014-06-27 11:15:04 +0200
commit: 8e8dd7540025b45c0fcb5687bcfeb12928b74426 (patch)
tree: 5651f85e59efdf2c9c890bb1d51a6b5c9058e86b /tex
parent: 6d09de5c379629ea7259803ae28185949cda27c8 (diff)
download: context-8e8dd7540025b45c0fcb5687bcfeb12928b74426.tar.gz
10 files changed, 233 insertions, 93 deletions
diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua
index 46235c4e9..98a780dcd 100644
--- a/tex/context/base/char-utf.lua
+++ b/tex/context/base/char-utf.lua
@@ -547,23 +547,29 @@ end
 
 local p_reorder = nil
 
-local sorter = function(a,b) return b[2] < a[2] end
+-- local sorter = function(a,b) return b[2] < a[2] end
+--
+-- local function swapper(s,p,t)
+--     local old = { }
+--     for i=1,#t do
+--         old[i] = t[i][1]
+--     end
+--     old = concat(old)
+--     sort(t,sorter)
+--     for i=1,#t do
+--         t[i] = t[i][1]
+--     end
+--     local new = concat(t)
+--     if old ~= new then
+--         print("reordered",old,"->",new)
+--     end
+--     return p, new
+-- end
 
-local function swapper(s,p,t)
-    local old = { }
-    for i=1,#t do
-        old[i] = t[i][1]
-    end
-    old = concat(old)
-    sort(t,sorter)
-    for i=1,#t do
-        t[i] = t[i][1]
-    end
-    local new = concat(t)
-    if old ~= new then
-        print("reordered",old,"->",new)
-    end
-    return p, new
+-- -- the next one isnto stable for similar weights
+
+local sorter = function(a,b)
+    return b[2] < a[2]
 end
 
 local function swapper(s,p,t)
@@ -574,16 +580,48 @@ local function swapper(s,p,t)
     return p, concat(t)
 end
 
+-- -- the next one keeps similar weights in the original order
+--
+-- local sorter = function(a,b)
+--     local b2, a2 = b[2], a[2]
+--     if a2 == b2 then
+--         return b[3] > a[3]
+--     else
+--         return b2 < a2
+--     end
+-- end
+--
+-- local function swapper(s,p,t)
+--     for i=1,#t do
+--         t[i][3] = i
+--     end
+--     sort(t,sorter)
+--     for i=1,#t do
+--         t[i] = t[i][1]
+--     end
+--     return p, concat(t)
+-- end
+
+-- at some point exceptions will become an option, for now it's an experiment
+-- to overcome bugs (that have become features) in unicode .. or we might decide
+-- for an extra ordering key in char-def that takes precedence over combining
+
+local exceptions = {
+    -- frozen unicode bug
+    ["َّ"] = "َّ", -- U+64E .. U+651 => U+651 .. U+64E
+}
+
 local function prepare()
     local hash = { }
     for k, v in sortedhash(characters.data) do
-        local combining = v.combining
+        local combining = v.combining -- v.ordering or v.combining
         if combining then
-            hash[utfchar(k)] = { utfchar(k), combining }
+            hash[utfchar(k)] = { utfchar(k), combining, 0 } -- slot 3 can be used in sort
         end
     end
+    local e = utfchartabletopattern(keys(exceptions))
     local p = utfchartabletopattern(keys(hash))
-    p_reorder = Cs((Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1)
+    p_reorder = Cs((e/exceptions + Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1)
 end
 
 function utffilters.reorder(str)
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index 0d7fa9a0b..53a57ef9c 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
-\newcontextversion{2014.06.26 12:08}
+\newcontextversion{2014.06.27 10:53}
 
 %D This file is loaded at runtime, thereby providing an excellent place for
 %D hacks, patches, extensions and new features.
diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf
index ba240100d..303d0d600 100644
--- a/tex/context/base/context-version.pdf
+++ b/tex/context/base/context-version.pdf
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index 735023acf..b936c288d 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -28,7 +28,7 @@
 %D up and the dependencies are more consistent.
 
 \edef\contextformat {\jobname}
-\edef\contextversion{2014.06.26 12:08}
+\edef\contextversion{2014.06.27 10:53}
 \edef\contextkind   {beta}
 
 %D For those who want to use this:
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 8f4df1cf6..8eee56299 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index 775d7b49d..7541cdaba 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
diff --git a/tex/context/base/typo-tal.lua b/tex/context/base/typo-tal.lua
index eb50fdda7..5d62d4e47 100644
--- a/tex/context/base/typo-tal.lua
+++ b/tex/context/base/typo-tal.lua
@@ -8,11 +8,16 @@ if not modules then modules = { } end modules ['typo-tal'] = {
 
 -- I'll make it a bit more efficient and provide named instances too which is needed for
 -- nested tables.
+--
+-- Currently we have two methods: text and number with some downward compatible
+-- defaulting.
 
 local next, type = next, type
 local div = math.div
 local utfbyte = utf.byte
 
+local splitmethod          = utilities.parsers.splitmethod
+
 local nodecodes            = nodes.nodecodes
 local glyph_code           = nodecodes.glyph
 local glue_code            = nodecodes.glue
@@ -21,6 +26,10 @@ local fontcharacters       = fonts.hashes.characters
 local unicodes             = fonts.hashes.unicodes
 local categories           = characters.categories -- nd
 
+local variables            = interfaces.variables
+local v_text               = variables.text
+local v_number             = variables.number
+
 local nuts                 = nodes.nuts
 local tonut                = nuts.tonut
 local tonode               = nuts.tonode
@@ -118,82 +127,123 @@ function characteralign.handler(originalhead,where)
     --
     local validseparators = dataset.separators
     local validsigns      = dataset.signs
+    local method          = dataset.method
     -- we can think of constraints
-    while current do
-        local id = getid(current)
-        if id == glyph_code then
-            local char = getchar(current)
-            local font = getfont(current)
-            local unicode = unicodes[font][char]
-            if not unicode then
-                -- no unicode so forget about it
-            elseif unicode == separator then
-                c = current
-                if trace_split then
-                    setcolor(current,"darkred")
-                end
-                dataset.hasseparator = true
-            elseif categories[unicode] == "nd" or validseparators[unicode] then
-                if c then
-                    if not a_start then
-                        a_start = current
-                    end
-                    a_stop = current
+    if method == v_number then
+        while current do
+            local id = getid(current)
+            if id == glyph_code then
+                local char = getchar(current)
+                local font = getfont(current)
+                local unicode = unicodes[font][char]
+                if not unicode then
+                    -- no unicode so forget about it
+                elseif unicode == separator then
+                    c = current
                     if trace_split then
-                        setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue")
+                        setcolor(current,"darkred")
                     end
-                else
-                    if not b_start then
-                        if sign then
-                            b_start = sign
-                            local new = validsigns[getchar(sign)]
-                            if char == new or not fontcharacters[getfont(sign)][new] then
-                                if trace_split then
-                                    setcolor(sign,"darkyellow")
+                    dataset.hasseparator = true
+                elseif categories[unicode] == "nd" or validseparators[unicode] then
+                    if c then
+                        if not a_start then
+                            a_start = current
+                        end
+                        a_stop = current
+                        if trace_split then
+                            setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue")
+                        end
+                    else
+                        if not b_start then
+                            if sign then
+                                b_start = sign
+                                local new = validsigns[getchar(sign)]
+                                if char == new or not fontcharacters[getfont(sign)][new] then
+                                    if trace_split then
+                                        setcolor(sign,"darkyellow")
+                                    end
+                                else
+                                    setfield(sign,"char",new)
+                                    if trace_split then
+                                        setcolor(sign,"darkmagenta")
+                                    end
                                 end
+                                sign = nil
+                                b_stop = current
                             else
-                                setfield(sign,"char",new)
-                                if trace_split then
-                                    setcolor(sign,"darkmagenta")
-                                end
+                                b_start = current
+                                b_stop = current
                             end
-                            sign = nil
-                            b_stop = current
                         else
-                            b_start = current
                             b_stop = current
                         end
-                    else
-                        b_stop = current
+                        if trace_split and current ~= sign then
+                            setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue")
+                        end
                     end
-                    if trace_split and current ~= sign then
-                        setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue")
+                elseif not b_start then
+                    sign = validsigns[unicode] and current
+                 -- if trace_split then
+                 --     setcolor(current,"darkgreen")
+                 -- end
+                end
+            elseif (b_start or a_start) and id == glue_code then
+                -- maybe only in number mode
+                -- somewhat inefficient
+                local next = getnext(current)
+                local prev = getprev(current)
+                if next and prev and getid(next) == glyph_code and getid(prev) == glyph_code then -- too much checking
+                    local width = fontcharacters[getfont(b_start)][separator or period].width
+                 -- local spec = getfield(current,"spec")
+                 -- free_spec(spec)
+                    setfield(current,"spec",new_gluespec(width))
+                    setattr(current,a_character,punctuationspace)
+                    if a_start then
+                        a_stop = current
+                    elseif b_start then
+                        b_stop = current
                     end
                 end
-            elseif not b_start then
-                sign = validsigns[unicode] and current
-             -- if trace_split then
-             --     setcolor(current,"darkgreen")
-             -- end
             end
-        elseif (b_start or a_start) and id == glue_code then
-            -- somewhat inefficient
-            local next = getnext(current)
-            local prev = getprev(current)
-            if next and prev and getid(next) == glyph_code and getid(prev) == glyph_code then -- too much checking
-                local width = fontcharacters[getfont(b_start)][separator or period].width
-             -- local spec = getfield(current,"spec")
-             -- free_spec(spec)
-                setfield(current,"spec",new_gluespec(width))
-                setattr(current,a_character,punctuationspace)
-                if a_start then
-                    a_stop = current
-                elseif b_start then
-                    b_stop = current
+            current = getnext(current)
+        end
+    else
+        while current do
+            local id = getid(current)
+            if id == glyph_code then
+                local char = getchar(current)
+                local font = getfont(current)
+                local unicode = unicodes[font][char]
+                if not unicode then
+                    -- no unicode so forget about it
+                elseif unicode == separator then
+                    c = current
+                    if trace_split then
+                        setcolor(current,"darkred")
+                    end
+                    dataset.hasseparator = true
+                else
+                    if c then
+                        if not a_start then
+                            a_start = current
+                        end
+                        a_stop = current
+                        if trace_split then
+                            setcolor(current,"darkgreen")
+                        end
+                    else
+                        if not b_start then
+                            b_start = current
+                        end
+                        b_stop = current
+                        if trace_split then
+                            setcolor(current,"darkblue")
+                        end
+                    end
                 end
             end
+            current = getnext(current)
         end
-        current = getnext(current)
     end
     local entry = list[row]
     if entry then
@@ -287,17 +337,28 @@ function setcharacteralign(column,separator)
     end
     local dataset = datasets[column] -- we can use a metatable
     if not dataset then
-        separator  = separator and utfbyte(separator) or comma
-        local auto = validseparators[separator]
+        local method, token
+        if separator then
+            method, token = splitmethod(separator)
+            if method and token then
+                separator = utfbyte(token) or comma
+            else
+                separator = utfbyte(separator) or comma
+                method    = validseparators[separator] and v_number or v_text
+            end
+        else
+            separator = comma
+            method    = v_number
+        end
         dataset = {
             separator  = separator,
             list       = { },
             maxafter   = 0,
             maxbefore  = 0,
             collected  = false,
-            mode       = auto and "numeric",
-            separators = auto and validseparators or { [separator] = true },
-            signs      = auto and validsigns or { },
+            method     = method,
+            separators = validseparators,
+            signs      = validsigns,
         }
         datasets[column] = dataset
         used = true
diff --git a/tex/context/base/typo-tal.mkiv b/tex/context/base/typo-tal.mkiv
index 11a5e381f..126233b1a 100644
--- a/tex/context/base/typo-tal.mkiv
+++ b/tex/context/base/typo-tal.mkiv
@@ -59,6 +59,7 @@
 \unexpanded\def\signalcharacteralign#1#2{\attribute\characteralignattribute=\numexpr#1*\plushundred+#2\relax}
 \unexpanded\def\setcharacteralign   #1#2{\ctxcommand{setcharacteralign(\number#1,"#2")}}
 \unexpanded\def\resetcharacteralign     {\ctxcommand{resetcharacteralign()}}
+\unexpanded\def\nocharacteralign        {\attribute\characteralignattribute\attributeunsetvalue}
 
 %D Mostly downward compatible:
 %D
@@ -73,6 +74,15 @@
 %D
 %D \typebuffer \blank \getbuffer \blank
 
+%D We have (currently) two modes: \type {text} and \type {number}. The handler tries
+%D to determine the mode automatically. When using periods and commas as separators
+%D the \type {number} mode is chosen. If you use for instance a \type {-} as
+%D separator, \type {text} is chosen, but you can enforce \type {number} with \type
+%D {number->-} (as with other mechanisms, the arrow indicates a methot to apply).
+%D
+%D One can use \type {\nocharacteralign} to disable this mechanism, for instance in
+%D a table cell.
+
 \def\alignmentcharacter{,}
 
 \unexpanded\def\typo_charalign_pass_one
@@ -86,19 +96,26 @@
 \def\typo_charalign_pass
   {\hbox\bgroup\signalcharacteralign\plusone\scratchcounter\let\next}
 
-\unexpanded\def\startcharacteralign#1\stopcharacteralign
+\unexpanded\def\startcharacteralig
+  {\dosingleempty\typo_charalign_start}
+
+\def\typo_charalign_start[#1]#2\stopcharacteralign
   {\bgroup
+   \edef\m_temp{#1}%
+   \ifx\m_temp\empty \else
+     \let\alignmentcharacter\m_temp
+   \fi
    \setcharacteralign\plusone\alignmentcharacter
    \begingroup
      \scratchcounter\zerocount
      \let\checkcharacteralign\typo_charalign_pass_one
      \settrialtypesetting
-     #1\relax
+     #2\relax
    \endgroup
    \begingroup
      \scratchcounter\zerocount
      \let\checkcharacteralign\typo_charalign_pass_two
-     #1\relax
+     #2\relax
    \endgroup
    \resetcharacteralign
    \egroup}
diff --git a/tex/context/base/util-prs.lua b/tex/context/base/util-prs.lua
index 2cede919b..f51f6fc75 100644
--- a/tex/context/base/util-prs.lua
+++ b/tex/context/base/util-prs.lua
@@ -542,8 +542,8 @@ end
 
 --
 
-local pattern_math = Cs((P("%")/"\\percent " +  P("^")           * Cc("{") * lpegpatterns.integer * Cc("}") + P(1))^0)
-local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + P(1))^0)
+local pattern_math = Cs((P("%")/"\\percent " +  P("^")           * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
+local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
 
 patterns.unittotex = pattern
 
@@ -551,7 +551,7 @@ function parsers.unittotex(str,textmode)
     return lpegmatch(textmode and pattern_text or pattern_math,str)
 end
 
-local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + P(1))^0)
+local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
 
 function parsers.unittoxml(str)
     return lpegmatch(pattern,str)
@@ -648,3 +648,27 @@ function utilities.parsers.runtime(time)
     local seconds = mod(time,60)
     return days, hours, minutes, seconds
 end
+
+--
+
+local spacing = whitespace^0
+local apply   = P("->")
+local method  = C((1-apply)^1)
+local token   = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
+
+local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
+
+function utilities.parsers.splitmethod(str,default)
+    if str then
+        return lpegmatch(pattern,str,1,default or false)
+    else
+        return default or false, ""
+    end
+end
+
+-- print(utilities.parsers.splitmethod(" foo -> {bar} "))
+-- print(utilities.parsers.splitmethod("foo->{bar}"))
+-- print(utilities.parsers.splitmethod("foo->bar"))
+-- print(utilities.parsers.splitmethod("foo"))
+-- print(utilities.parsers.splitmethod("{foo}"))
+-- print(utilities.parsers.splitmethod())
diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua
index 6e0870dfe..5f509be8a 100644
--- a/tex/generic/context/luatex/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
 -- merged file : luatex-fonts-merged.lua
 -- parent file : luatex-fonts.lua
--- merge date  : 06/26/14 12:08:01
+-- merge date  : 06/27/14 10:53:59
 
 do -- begin closure to overcome local limits and interference
author	Context Git Mirror Bot <phg42.2a@gmail.com>	2014-06-27 11:15:04 +0200
committer	Context Git Mirror Bot <phg42.2a@gmail.com>	2014-06-27 11:15:04 +0200
commit	8e8dd7540025b45c0fcb5687bcfeb12928b74426 (patch)
tree	5651f85e59efdf2c9c890bb1d51a6b5c9058e86b /tex
parent	6d09de5c379629ea7259803ae28185949cda27c8 (diff)
download	context-8e8dd7540025b45c0fcb5687bcfeb12928b74426.tar.gz