From ca16b82275f15170ca269f77b9dd9b0e29bbd7f6 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Sun, 1 Jun 2014 13:44:00 +0200 Subject: beta 2014.06.01 13:44 --- tex/context/base/back-exp.mkiv | 6 +- tex/context/base/buff-ver.mkiv | 11 ++- tex/context/base/char-act.mkiv | 7 +- tex/context/base/cont-new.mkiv | 2 +- tex/context/base/context-version.pdf | Bin 4321 -> 4325 bytes tex/context/base/context.mkiv | 2 +- tex/context/base/mult-low.lua | 4 +- tex/context/base/sort-ini.lua | 96 ++++++++++++++++----- tex/context/base/spac-ver.lua | 19 +++- tex/context/base/status-files.pdf | Bin 24914 -> 24844 bytes tex/context/base/status-lua.pdf | Bin 246731 -> 246733 bytes tex/context/base/strc-syn.lua | 32 +++---- tex/context/base/strc-tag.mkiv | 6 ++ tex/generic/context/luatex/luatex-fonts-merged.lua | 2 +- 14 files changed, 137 insertions(+), 50 deletions(-) (limited to 'tex') diff --git a/tex/context/base/back-exp.mkiv b/tex/context/base/back-exp.mkiv index c7696d383..7fd1b5799 100644 --- a/tex/context/base/back-exp.mkiv +++ b/tex/context/base/back-exp.mkiv @@ -113,9 +113,9 @@ \unexpanded\def\dotagsetnotesymbol{\taggedctxcommand{settagdescriptionsymbol("\currentnote",\currentnotenumber)}}% \to \everyenableelements -\appendtoks - \unexpanded\def\doverbatimspace{\char32\relax}% will be done permanently -\to \everyenableelements +% \appendtoks +% \unexpanded\def\doverbatimspace{\asciispacechar}% will be done permanently +% \to \everyenableelements % The action: \setupbackend[export=yes] % or filename diff --git a/tex/context/base/buff-ver.mkiv b/tex/context/base/buff-ver.mkiv index 10002c4a7..7a4f28253 100644 --- a/tex/context/base/buff-ver.mkiv +++ b/tex/context/base/buff-ver.mkiv @@ -376,6 +376,15 @@ \unexpanded\def\specialstretchedspace{\hskip.5\interwordspace\s!plus.125\interwordspace\relax} % \interwordstretch can be zero \unexpanded\def\specialcontrolspace {\hskip\zeropoint\hbox{\normalcontrolspace}\hskip\zeropoint\relax} +% \unexpanded\def\taggedspecialfixedspace {\hskip\zeropoint\asciispacechar\hskip\zeropoint} +% \unexpanded\def\taggedspecialobeyedspace {\hskip\zeropoint\asciispacechar\hskip\zeropoint} +% \unexpanded\def\taggedspecialstretchedspace{\hskip\zeropoint\asciispacechar\hskip\zeropoint} +% \unexpanded\def\taggedspecialcontrolspace {\hskip\zeropoint\hbox{\normalcontrolspace}\hskip\zeropoint\relax} + +\appendtoks + \unexpanded\def\obeyedspace{\hskip\zeropoint\asciispacechar\hskip\zeropoint}% +\to \everyenableelements + \unexpanded\def\obeyhyphens {\let\obeyedspace \specialobeyedspace % maybe \specialstretchedspace \let\controlspace\specialcontrolspace @@ -873,7 +882,7 @@ \newcount \c_buff_verbatim_current \newconditional\c_buff_optimize_linebreaks -\def\doverbatimspace {\obeyedspace} + \def\doverbatimspace {\obeyedspace} \unexpanded\def\doinlineverbatimstart {} \unexpanded\def\doinlineverbatimstop {} diff --git a/tex/context/base/char-act.mkiv b/tex/context/base/char-act.mkiv index 011c29d07..7d7268c8b 100644 --- a/tex/context/base/char-act.mkiv +++ b/tex/context/base/char-act.mkiv @@ -24,6 +24,8 @@ %D \NEWLINE\ and \NEWPAGE\ active and assigning them %D \type{\obeysomething}, but first we set some default values. +% These are expandable! + \def\obeyedspace {\space} \def\obeyedtab {\obeyedspace} \def\obeyedline {\par} @@ -36,7 +38,10 @@ %D spaces (control spaces) we only have to adapt the definition %D of \type{\obeyedspace} to: -\unexpanded\def\controlspace{\hbox{\char32}} % rather tex, we need the unicode value +\chardef\asciispacechar\spaceasciicode % a real space character + +\unexpanded\def\naturalspace{\asciispacechar} +\unexpanded\def\controlspace{\hbox{\asciispacechar}} % rather tex, we need the unicode value \unexpanded\def\normalspaces{\catcode\spaceasciicode\spacecatcode} \bgroup diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 11d62d298..2e16f0d01 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2014.05.30 23:26} +\newcontextversion{2014.06.01 13:44} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf index a3983ea30..2b7bdd083 100644 Binary files a/tex/context/base/context-version.pdf and b/tex/context/base/context-version.pdf differ diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index 22a770b9d..c56ce4e55 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -28,7 +28,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2014.05.30 23:26} +\edef\contextversion{2014.06.01 13:44} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/mult-low.lua b/tex/context/base/mult-low.lua index 2bae5a0b0..faa1302a9 100644 --- a/tex/context/base/mult-low.lua +++ b/tex/context/base/mult-low.lua @@ -126,7 +126,7 @@ return { "twoperemspace", "threeperemspace", "fourperemspace", "fiveperemspace", "sixperemspace", "figurespace", "punctuationspace", "hairspace", "zerowidthspace", "zerowidthnonjoiner", "zerowidthjoiner", "zwnj", "zwj", - "optionalspace", + "optionalspace", "asciispacechar", }, ["helpers"] = { -- @@ -241,7 +241,7 @@ return { "removetoks", "appendtoks", "prependtoks", "appendtotoks", "prependtotoks", "to", -- "endgraf", "endpar", "everyendpar", "reseteverypar", "finishpar", "empty", "null", "space", "quad", "enspace", "nbsp", - "obeyspaces", "obeylines", "obeyedspace", "obeyedline", + "obeyspaces", "obeylines", "obeyedspace", "obeyedline", "obeyedtab", "obeyedpage", "normalspace", -- "executeifdefined", diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua index 42d83188e..d1eaacd15 100644 --- a/tex/context/base/sort-ini.lua +++ b/tex/context/base/sort-ini.lua @@ -39,11 +39,18 @@ relatively easy to do.

how they map onto this mechanism. I've learned that users can come up with any demand so nothing here is frozen.

+

Todo: I ran into the Unicode Collation document and noticed that +there are some similarities (like the weights) but using that method +would still demand extra code for language specifics. One option is +to use the allkeys.txt file for the uc vectors but then we would also +use the collapsed key (sq, code is now commented). In fact, we could +just hook those into the replacer code that we reun beforehand.

+

In the future index entries will become more clever, i.e. they will have language etc properties that then can be used.

]]-- -local gsub, rep, sub, sort, concat = string.gsub, string.rep, string.sub, table.sort, table.concat +local gsub, rep, sub, sort, concat, tohash, format = string.gsub, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format local utfbyte, utfchar, utfcharacters, utfvalues = utf.byte, utf.char, utf.characters, utf.values local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset @@ -52,6 +59,7 @@ local setmetatableindex = table.setmetatableindex local trace_tests = false trackers.register("sorters.tests", function(v) trace_tests = v end) local trace_methods = false trackers.register("sorters.methods", function(v) trace_methods = v end) +local trace_orders = false trackers.register("sorters.orders", function(v) trace_orders = v end) local report_sorters = logs.reporter("languages","sorters") @@ -65,7 +73,9 @@ local digitsoffset = 0x20000 -- frozen local digitsmaximum = 0xFFFFF -- frozen local lccodes = characters.lccodes +local uccodes = characters.uccodes local lcchars = characters.lcchars +local ucchars = characters.ucchars local shchars = characters.shchars local fscodes = characters.fscodes local fschars = characters.fschars @@ -81,7 +91,7 @@ local v_after = variables.after local v_first = variables.first local v_last = variables.last -local validmethods = table.tohash { +local validmethods = tohash { "ch", -- raw character (for tracing) "mm", -- minus mapping "zm", -- zero mapping @@ -169,12 +179,12 @@ local function preparetables(data) __index = function(t,k) local n, nn if k then - if trace_tests then + if trace_orders then report_sorters("simplifing character %C",k) end local l = lower[k] or lcchars[k] if l then - if trace_tests then + if trace_orders then report_sorters(" 1 lower: %C",l) end local ml = rawget(t,l) @@ -185,7 +195,7 @@ local function preparetables(data) nn = nn + 1 n[nn] = ml[i] + (t.__delta or 0) end - if trace_tests then + if trace_orders then report_sorters(" 2 order: % t",n) end end @@ -193,7 +203,7 @@ local function preparetables(data) if not n then local s = shchars[k] -- maybe all components? if s and s ~= k then - if trace_tests then + if trace_orders then report_sorters(" 3 shape: %C",s) end n = { } @@ -201,7 +211,7 @@ local function preparetables(data) for l in utfcharacters(s) do local ml = rawget(t,l) if ml then - if trace_tests then + if trace_orders then report_sorters(" 4 keep: %C",l) end if ml then @@ -213,7 +223,7 @@ local function preparetables(data) else l = lower[l] or lcchars[l] if l then - if trace_tests then + if trace_orders then report_sorters(" 5 lower: %C",l) end local ml = rawget(t,l) @@ -232,7 +242,7 @@ local function preparetables(data) -- -- s = fschars[k] -- if s and s ~= k then - -- if trace_tests then + -- if trace_orders then -- report_sorters(" 6 split: %s",s) -- end -- local ml = rawget(t,s) @@ -247,24 +257,24 @@ local function preparetables(data) -- end local b = utfbyte(k) n = decomposed[b] or { b } - if trace_tests then + if trace_orders then report_sorters(" 6 split: %s",utf.tostring(b)) -- todo end end if n then - if trace_tests then + if trace_orders then report_sorters(" 7 order: % t",n) end else n = noorder - if trace_tests then + if trace_orders then report_sorters(" 8 order: 0") end end end else n = noorder - if trace_tests then + if trace_orders then report_sorters(" 9 order: 0") end end @@ -334,8 +344,8 @@ local function setlanguage(l,m,d,u) report_sorters("invalid sorter method %a in %a",s,method) end end + usedinsequence = tohash(sequence) data.sequence = sequence - usedinsequence = table.tohash(sequence) data.usedinsequence = usedinsequence -- usedinsequence.ch = true -- better just store the string if trace_tests then @@ -387,7 +397,6 @@ local function basic(a,b) -- trace ea and eb for j=1,#sequence do local m = sequence[j] result = basicsort(ea[m],eb[m]) --- print(m,result) if result ~= 0 then return result end @@ -439,6 +448,36 @@ local function basic(a,b) -- trace ea and eb end end +-- if we use sq: +-- +-- local function basic(a,b) -- trace ea and eb +-- local ea, eb = a.split, b.split +-- local na, nb = #ea, #eb +-- if na == 0 and nb == 0 then +-- -- simple variant (single word) +-- return basicsort(ea.sq,eb.sq) +-- else +-- -- complex variant, used in register (multiple words) +-- local result = 0 +-- for i=1,nb < na and nb or na do +-- local eai, ebi = ea[i], eb[i] +-- result = basicsort(ea.sq,eb.sq) +-- if result ~= 0 then +-- return result +-- end +-- end +-- if result ~= 0 then +-- return result +-- elseif na > nb then +-- return 1 +-- elseif nb > na then +-- return -1 +-- else +-- return 0 +-- end +-- end +-- end + comparers.basic = basic function sorters.basicsorter(a,b) @@ -531,10 +570,15 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean else n = n + 1 local l = lower[sc] - l = l and utfbyte(l) or lccodes[b] + l = l and utfbyte(l) or lccodes[b] or b + -- local u = upper[sc] + -- u = u and utfbyte(u) or uccodes[b] or b if type(l) == "table" then l = l[1] -- there are currently no tables in lccodes but it can be some, day end + -- if type(u) == "table" then + -- u = u[1] -- there are currently no tables in lccodes but it can be some, day + -- end z_case[n] = l if l ~= b then m_case[n] = l - 1 @@ -593,9 +637,9 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean -- p_mapping = { p_mappings[fs][1] } -- end -- end - + local result if checked then - return { + result = { ch = trace_tests and char or nil, -- not in sequence uc = usedinsequence.uc and byte or nil, mc = usedinsequence.mc and m_case or nil, @@ -606,7 +650,7 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean pm = usedinsequence.pm and p_mapping or nil, } else - return { + result = { ch = char, uc = byte, mc = m_case, @@ -617,7 +661,15 @@ function splitters.utf(str,checked) -- we could append m and u but this is clean pm = p_mapping, } end - + -- local sq, n = { }, 0 + -- for i=1,#byte do + -- for s=1,#sequence do + -- n = n + 1 + -- sq[n] = result[sequence[s]][i] + -- end + -- end + -- result.sq = sq + return result end local function packch(entry) @@ -648,11 +700,11 @@ local function packuc(entry) if #split > 0 then -- useless test local t = { } for i=1,#split do - t[i] = concat(split[i].uc, " ") + t[i] = concat(split[i].uc, " ") -- sq end return concat(t," + ") else - return concat(split.uc," ") + return concat(split.uc," ") -- sq end end diff --git a/tex/context/base/spac-ver.lua b/tex/context/base/spac-ver.lua index 018881663..55c135cf6 100644 --- a/tex/context/base/spac-ver.lua +++ b/tex/context/base/spac-ver.lua @@ -879,6 +879,8 @@ local special_penalty_xxx = 0 -- header don't break but also make sure that we have at least a decent -- break when we have succesive ones (often when testing) +-- todo: mark headers as such so that we can recognize them + local specialmethods = { } local specialmethod = 1 @@ -927,10 +929,21 @@ specialmethods[1] = function(start,penalty) return end elseif trace_specials then - report_specials(" context %a, higher level, continue",p) + report_specials(" context penalty %a, higher level, continue",p) + end + else + local p = getfield(current,"penalty") + if p < 10000 then + -- assume some other mechanism kicks in so we seem to have content + if trace_specials then + report_specials(" regular penalty %a, quitting",p) + end + break + else + if trace_specials then + report_specials(" regular penalty %a, continue",p) + end end - elseif trace_specials then - report_specials(" regular penalty, continue") end end current = getprev(current) diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 55046b375..9c73215cc 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf index c1435146e..b43a62bf2 100644 Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ diff --git a/tex/context/base/strc-syn.lua b/tex/context/base/strc-syn.lua index e27974eb2..2ca428455 100644 --- a/tex/context/base/strc-syn.lua +++ b/tex/context/base/strc-syn.lua @@ -139,23 +139,26 @@ function synonyms.sort(data,options) sorters.sort(data.result,synonyms.compare) end -function synonyms.finalize(data,options) +function synonyms.finalize(data,options) -- mostly the same as registers so we will generalize it: sorters.split local result = data.result data.metadata.nofsorted = #result - local split = { } + local split, nofsplit, lasttag, done, nofdone = { }, 0, nil, nil, 0 + local firstofsplit = sorters.firstofsplit for k=1,#result do local v = result[k] local entry, tag = firstofsplit(v) - local s = split[entry] -- keeps track of change - local d - if not s then - d = { } - s = { tag = tag, data = d } - split[entry] = s - else - d = s.data + if tag ~= lasttag then + -- if trace_registers then + -- report_registers("splitting at %a",tag) + -- end + done = { } + nofdone = 0 + nofsplit = nofsplit + 1 + lasttag = tag + split[nofsplit] = { tag = tag, data = done } end - d[#d+1] = v + nofdone = nofdone + 1 + done[nofdone] = v end data.result = split end @@ -168,10 +171,9 @@ local ctx_synonymentry = context.synonymentry function synonyms.flush(data,options) local kind = data.metadata.kind -- hack, will be done better local result = data.result - local sorted = table.sortedkeys(result) - for k=1,#sorted do - local letter = sorted[k] - local sublist = result[letter] + for i=1,#result do + local sublist = result[i] + local letter = sublist.tag local data = sublist.data for d=1,#data do local entry = data[d].definition diff --git a/tex/context/base/strc-tag.mkiv b/tex/context/base/strc-tag.mkiv index 6e792fd3f..7e15be4a3 100644 --- a/tex/context/base/strc-tag.mkiv +++ b/tex/context/base/strc-tag.mkiv @@ -11,6 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. +% labels: no language needed % key/values and other names might change (and probably will) \writestatus{loading}{ConTeXt Structure Macros / Tags} @@ -176,6 +177,11 @@ \expandafter\strc_tags_element_stop_yes \fi} +% if mainlanguage == en we can even omit the label (default to tag) which is faster +% +% \unexpanded\def\strc_tags_element_start_yes_indeed_yes[#1][#2]% +% {\ctxcommand{starttag("#1",{label="#1",userdata=\!!bs#2\!!es})}} + \unexpanded\def\strc_tags_element_start_yes_indeed_yes[#1][#2]% {\ctxcommand{starttag("#1",{label="\dogetupsometaglabeltext{#1}",userdata=\!!bs#2\!!es})}} diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 1732a2345..e9bdd7918 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 05/30/14 23:26:41 +-- merge date : 06/01/14 13:44:02 do -- begin closure to overcome local limits and interference -- cgit v1.2.3