From 8e8dd7540025b45c0fcb5687bcfeb12928b74426 Mon Sep 17 00:00:00 2001 From: Context Git Mirror Bot Date: Fri, 27 Jun 2014 11:15:04 +0200 Subject: 2014-06-27 10:56:00 --- tex/context/base/char-utf.lua | 76 ++++++-- tex/context/base/cont-new.mkiv | 2 +- tex/context/base/context-version.pdf | Bin 4348 -> 4354 bytes tex/context/base/context.mkiv | 2 +- tex/context/base/status-files.pdf | Bin 24873 -> 24880 bytes tex/context/base/status-lua.pdf | Bin 248293 -> 248312 bytes tex/context/base/typo-tal.lua | 191 ++++++++++++++------- tex/context/base/typo-tal.mkiv | 23 ++- tex/context/base/util-prs.lua | 30 +++- tex/generic/context/luatex/luatex-fonts-merged.lua | 2 +- 10 files changed, 233 insertions(+), 93 deletions(-) (limited to 'tex') diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua index 46235c4e9..98a780dcd 100644 --- a/tex/context/base/char-utf.lua +++ b/tex/context/base/char-utf.lua @@ -547,23 +547,29 @@ end local p_reorder = nil -local sorter = function(a,b) return b[2] < a[2] end +-- local sorter = function(a,b) return b[2] < a[2] end +-- +-- local function swapper(s,p,t) +-- local old = { } +-- for i=1,#t do +-- old[i] = t[i][1] +-- end +-- old = concat(old) +-- sort(t,sorter) +-- for i=1,#t do +-- t[i] = t[i][1] +-- end +-- local new = concat(t) +-- if old ~= new then +-- print("reordered",old,"->",new) +-- end +-- return p, new +-- end -local function swapper(s,p,t) - local old = { } - for i=1,#t do - old[i] = t[i][1] - end - old = concat(old) - sort(t,sorter) - for i=1,#t do - t[i] = t[i][1] - end - local new = concat(t) - if old ~= new then - print("reordered",old,"->",new) - end - return p, new +-- -- the next one isnto stable for similar weights + +local sorter = function(a,b) + return b[2] < a[2] end local function swapper(s,p,t) @@ -574,16 +580,48 @@ local function swapper(s,p,t) return p, concat(t) end +-- -- the next one keeps similar weights in the original order +-- +-- local sorter = function(a,b) +-- local b2, a2 = b[2], a[2] +-- if a2 == b2 then +-- return b[3] > a[3] +-- else +-- return b2 < a2 +-- end +-- end +-- +-- local function swapper(s,p,t) +-- for i=1,#t do +-- t[i][3] = i +-- end +-- sort(t,sorter) +-- for i=1,#t do +-- t[i] = t[i][1] +-- end +-- return p, concat(t) +-- end + +-- at some point exceptions will become an option, for now it's an experiment +-- to overcome bugs (that have become features) in unicode .. or we might decide +-- for an extra ordering key in char-def that takes precedence over combining + +local exceptions = { + -- frozen unicode bug + ["َّ"] = "َّ", -- U+64E .. U+651 => U+651 .. U+64E +} + local function prepare() local hash = { } for k, v in sortedhash(characters.data) do - local combining = v.combining + local combining = v.combining -- v.ordering or v.combining if combining then - hash[utfchar(k)] = { utfchar(k), combining } + hash[utfchar(k)] = { utfchar(k), combining, 0 } -- slot 3 can be used in sort end end + local e = utfchartabletopattern(keys(exceptions)) local p = utfchartabletopattern(keys(hash)) - p_reorder = Cs((Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1) + p_reorder = Cs((e/exceptions + Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1) end function utffilters.reorder(str) diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 0d7fa9a0b..53a57ef9c 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2014.06.26 12:08} +\newcontextversion{2014.06.27 10:53} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf index ba240100d..303d0d600 100644 Binary files a/tex/context/base/context-version.pdf and b/tex/context/base/context-version.pdf differ diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index 735023acf..b936c288d 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -28,7 +28,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2014.06.26 12:08} +\edef\contextversion{2014.06.27 10:53} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 8f4df1cf6..8eee56299 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf index 775d7b49d..7541cdaba 100644 Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ diff --git a/tex/context/base/typo-tal.lua b/tex/context/base/typo-tal.lua index eb50fdda7..5d62d4e47 100644 --- a/tex/context/base/typo-tal.lua +++ b/tex/context/base/typo-tal.lua @@ -8,11 +8,16 @@ if not modules then modules = { } end modules ['typo-tal'] = { -- I'll make it a bit more efficient and provide named instances too which is needed for -- nested tables. +-- +-- Currently we have two methods: text and number with some downward compatible +-- defaulting. local next, type = next, type local div = math.div local utfbyte = utf.byte +local splitmethod = utilities.parsers.splitmethod + local nodecodes = nodes.nodecodes local glyph_code = nodecodes.glyph local glue_code = nodecodes.glue @@ -21,6 +26,10 @@ local fontcharacters = fonts.hashes.characters local unicodes = fonts.hashes.unicodes local categories = characters.categories -- nd +local variables = interfaces.variables +local v_text = variables.text +local v_number = variables.number + local nuts = nodes.nuts local tonut = nuts.tonut local tonode = nuts.tonode @@ -118,82 +127,123 @@ function characteralign.handler(originalhead,where) -- local validseparators = dataset.separators local validsigns = dataset.signs + local method = dataset.method -- we can think of constraints - while current do - local id = getid(current) - if id == glyph_code then - local char = getchar(current) - local font = getfont(current) - local unicode = unicodes[font][char] - if not unicode then - -- no unicode so forget about it - elseif unicode == separator then - c = current - if trace_split then - setcolor(current,"darkred") - end - dataset.hasseparator = true - elseif categories[unicode] == "nd" or validseparators[unicode] then - if c then - if not a_start then - a_start = current - end - a_stop = current + if method == v_number then + while current do + local id = getid(current) + if id == glyph_code then + local char = getchar(current) + local font = getfont(current) + local unicode = unicodes[font][char] + if not unicode then + -- no unicode so forget about it + elseif unicode == separator then + c = current if trace_split then - setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue") + setcolor(current,"darkred") end - else - if not b_start then - if sign then - b_start = sign - local new = validsigns[getchar(sign)] - if char == new or not fontcharacters[getfont(sign)][new] then - if trace_split then - setcolor(sign,"darkyellow") + dataset.hasseparator = true + elseif categories[unicode] == "nd" or validseparators[unicode] then + if c then + if not a_start then + a_start = current + end + a_stop = current + if trace_split then + setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue") + end + else + if not b_start then + if sign then + b_start = sign + local new = validsigns[getchar(sign)] + if char == new or not fontcharacters[getfont(sign)][new] then + if trace_split then + setcolor(sign,"darkyellow") + end + else + setfield(sign,"char",new) + if trace_split then + setcolor(sign,"darkmagenta") + end end + sign = nil + b_stop = current else - setfield(sign,"char",new) - if trace_split then - setcolor(sign,"darkmagenta") - end + b_start = current + b_stop = current end - sign = nil - b_stop = current else - b_start = current b_stop = current end - else - b_stop = current + if trace_split and current ~= sign then + setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue") + end end - if trace_split and current ~= sign then - setcolor(current,validseparators[unicode] and "darkcyan" or "darkblue") + elseif not b_start then + sign = validsigns[unicode] and current + -- if trace_split then + -- setcolor(current,"darkgreen") + -- end + end + elseif (b_start or a_start) and id == glue_code then + -- maybe only in number mode + -- somewhat inefficient + local next = getnext(current) + local prev = getprev(current) + if next and prev and getid(next) == glyph_code and getid(prev) == glyph_code then -- too much checking + local width = fontcharacters[getfont(b_start)][separator or period].width + -- local spec = getfield(current,"spec") + -- free_spec(spec) + setfield(current,"spec",new_gluespec(width)) + setattr(current,a_character,punctuationspace) + if a_start then + a_stop = current + elseif b_start then + b_stop = current end end - elseif not b_start then - sign = validsigns[unicode] and current - -- if trace_split then - -- setcolor(current,"darkgreen") - -- end end - elseif (b_start or a_start) and id == glue_code then - -- somewhat inefficient - local next = getnext(current) - local prev = getprev(current) - if next and prev and getid(next) == glyph_code and getid(prev) == glyph_code then -- too much checking - local width = fontcharacters[getfont(b_start)][separator or period].width - -- local spec = getfield(current,"spec") - -- free_spec(spec) - setfield(current,"spec",new_gluespec(width)) - setattr(current,a_character,punctuationspace) - if a_start then - a_stop = current - elseif b_start then - b_stop = current + current = getnext(current) + end + else + while current do + local id = getid(current) + if id == glyph_code then + local char = getchar(current) + local font = getfont(current) + local unicode = unicodes[font][char] + if not unicode then + -- no unicode so forget about it + elseif unicode == separator then + c = current + if trace_split then + setcolor(current,"darkred") + end + dataset.hasseparator = true + else + if c then + if not a_start then + a_start = current + end + a_stop = current + if trace_split then + setcolor(current,"darkgreen") + end + else + if not b_start then + b_start = current + end + b_stop = current + if trace_split then + setcolor(current,"darkblue") + end + end end end + current = getnext(current) end - current = getnext(current) end local entry = list[row] if entry then @@ -287,17 +337,28 @@ function setcharacteralign(column,separator) end local dataset = datasets[column] -- we can use a metatable if not dataset then - separator = separator and utfbyte(separator) or comma - local auto = validseparators[separator] + local method, token + if separator then + method, token = splitmethod(separator) + if method and token then + separator = utfbyte(token) or comma + else + separator = utfbyte(separator) or comma + method = validseparators[separator] and v_number or v_text + end + else + separator = comma + method = v_number + end dataset = { separator = separator, list = { }, maxafter = 0, maxbefore = 0, collected = false, - mode = auto and "numeric", - separators = auto and validseparators or { [separator] = true }, - signs = auto and validsigns or { }, + method = method, + separators = validseparators, + signs = validsigns, } datasets[column] = dataset used = true diff --git a/tex/context/base/typo-tal.mkiv b/tex/context/base/typo-tal.mkiv index 11a5e381f..126233b1a 100644 --- a/tex/context/base/typo-tal.mkiv +++ b/tex/context/base/typo-tal.mkiv @@ -59,6 +59,7 @@ \unexpanded\def\signalcharacteralign#1#2{\attribute\characteralignattribute=\numexpr#1*\plushundred+#2\relax} \unexpanded\def\setcharacteralign #1#2{\ctxcommand{setcharacteralign(\number#1,"#2")}} \unexpanded\def\resetcharacteralign {\ctxcommand{resetcharacteralign()}} +\unexpanded\def\nocharacteralign {\attribute\characteralignattribute\attributeunsetvalue} %D Mostly downward compatible: %D @@ -73,6 +74,15 @@ %D %D \typebuffer \blank \getbuffer \blank +%D We have (currently) two modes: \type {text} and \type {number}. The handler tries +%D to determine the mode automatically. When using periods and commas as separators +%D the \type {number} mode is chosen. If you use for instance a \type {-} as +%D separator, \type {text} is chosen, but you can enforce \type {number} with \type +%D {number->-} (as with other mechanisms, the arrow indicates a methot to apply). +%D +%D One can use \type {\nocharacteralign} to disable this mechanism, for instance in +%D a table cell. + \def\alignmentcharacter{,} \unexpanded\def\typo_charalign_pass_one @@ -86,19 +96,26 @@ \def\typo_charalign_pass {\hbox\bgroup\signalcharacteralign\plusone\scratchcounter\let\next} -\unexpanded\def\startcharacteralign#1\stopcharacteralign +\unexpanded\def\startcharacteralig + {\dosingleempty\typo_charalign_start} + +\def\typo_charalign_start[#1]#2\stopcharacteralign {\bgroup + \edef\m_temp{#1}% + \ifx\m_temp\empty \else + \let\alignmentcharacter\m_temp + \fi \setcharacteralign\plusone\alignmentcharacter \begingroup \scratchcounter\zerocount \let\checkcharacteralign\typo_charalign_pass_one \settrialtypesetting - #1\relax + #2\relax \endgroup \begingroup \scratchcounter\zerocount \let\checkcharacteralign\typo_charalign_pass_two - #1\relax + #2\relax \endgroup \resetcharacteralign \egroup} diff --git a/tex/context/base/util-prs.lua b/tex/context/base/util-prs.lua index 2cede919b..f51f6fc75 100644 --- a/tex/context/base/util-prs.lua +++ b/tex/context/base/util-prs.lua @@ -542,8 +542,8 @@ end -- -local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + P(1))^0) -local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + P(1))^0) +local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0) +local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0) patterns.unittotex = pattern @@ -551,7 +551,7 @@ function parsers.unittotex(str,textmode) return lpegmatch(textmode and pattern_text or pattern_math,str) end -local pattern = Cs((P("^") / "" * lpegpatterns.integer * Cc("") + P(1))^0) +local pattern = Cs((P("^") / "" * lpegpatterns.integer * Cc("") + anything)^0) function parsers.unittoxml(str) return lpegmatch(pattern,str) @@ -648,3 +648,27 @@ function utilities.parsers.runtime(time) local seconds = mod(time,60) return days, hours, minutes, seconds end + +-- + +local spacing = whitespace^0 +local apply = P("->") +local method = C((1-apply)^1) +local token = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1) + +local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token + +function utilities.parsers.splitmethod(str,default) + if str then + return lpegmatch(pattern,str,1,default or false) + else + return default or false, "" + end +end + +-- print(utilities.parsers.splitmethod(" foo -> {bar} ")) +-- print(utilities.parsers.splitmethod("foo->{bar}")) +-- print(utilities.parsers.splitmethod("foo->bar")) +-- print(utilities.parsers.splitmethod("foo")) +-- print(utilities.parsers.splitmethod("{foo}")) +-- print(utilities.parsers.splitmethod()) diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 6e0870dfe..5f509be8a 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 06/26/14 12:08:01 +-- merge date : 06/27/14 10:53:59 do -- begin closure to overcome local limits and interference -- cgit v1.2.3