From 815a031881f613340bf8a9a4f721d35642218637 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Fri, 6 May 2011 16:52:00 +0200 Subject: beta 2011.05.06 16:52 --- tex/context/base/back-exp.lua | 5 +- tex/context/base/back-exp.mkiv | 11 +- tex/context/base/char-ini.lua | 57 +++----- tex/context/base/char-utf.lua | 219 +++++++++++++++++++++++++++- tex/context/base/char-utf.mkiv | 11 +- tex/context/base/status-files.pdf | Bin 23530 -> 23543 bytes tex/context/base/status-lua.pdf | Bin 154266 -> 154301 bytes tex/generic/context/luatex-fonts-merged.lua | 2 +- 8 files changed, 256 insertions(+), 49 deletions(-) diff --git a/tex/context/base/back-exp.lua b/tex/context/base/back-exp.lua index 5ceb360dc..46ddc4f53 100644 --- a/tex/context/base/back-exp.lua +++ b/tex/context/base/back-exp.lua @@ -1066,7 +1066,7 @@ local function stopexport(v) if handle then report_export("saving xml data in '%s",xmlfile) handle:write(format(xmlpreamble,tex.jobname,os.date(),environment.version,version)) - if cssfile then + if type(cssfile) == "string" then local cssfiles = settings_to_array(cssfile) for i=1,#cssfiles do local cssfile = cssfiles[i] @@ -1099,7 +1099,8 @@ local function stopexport(v) io.savedata(cssfile,concat(templates,"\n\n")) -- xhtml references if xhtmlfile then - if type(v) ~= "string" or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then + -- messy + if type(v) ~= "string" or xhtmlfile == true or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then xhtmlfile = file.replacesuffix(xmlfile,"xhtml") else xhtmlfile = file.addsuffix(xhtmlfile,"xhtml") diff --git a/tex/context/base/back-exp.mkiv b/tex/context/base/back-exp.mkiv index 4682e8047..88272fca7 100644 --- a/tex/context/base/back-exp.mkiv +++ b/tex/context/base/back-exp.mkiv @@ -109,14 +109,19 @@ \def\c!css {css} \def\c!xhtml {xhtml} +\appendtoks + \doifsomething{\backendparameter\c!xhtml} + {\enabledirectives[backend.export.xhtml=\backendparameter\c!xhtml]}% + \doifsomething{\backendparameter\c!css} + {\enabledirectives[backend.export.css={\backendparameter\c!css}]}% +\to \everysetupbackend + \appendtoks \doifsomething{\backendparameter\c!export} {\setupstructure [\c!state=\v!start]% \enabledirectives - [backend.export=\backendparameter\c!export,% - backend.export.xhtml=\backendparameter\c!xhtml,% - backend.export.css={\backendparameter\c!css}]}% + [backend.export=\backendparameter\c!export]}% \to \everysetupbackend \protect \endinput diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 7f8c2db2f..c85bb3f49 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -386,10 +386,15 @@ local is_spacing = allocate ( table.tohash { "zs", "zl","zp", } ) +local is_mark = allocate ( table.tohash { + "mn", "ms", +} ) + characters.is_character = is_character characters.is_letter = is_letter characters.is_command = is_command characters.is_spacing = is_spacing +characters.is_mark = is_mark local mt = { -- yes or no ? __index = function(t,k) @@ -511,17 +516,6 @@ function characters.define(tobelettered, tobeactivated) -- catcodetables local contextname = chr.contextname if contextname then local category = chr.category ---~ if is_character[category] then ---~ if chr.unicodeslot < 128 then ---~ texprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) ---~ else ---~ texprint(ctxcatcodes,format("\\let\\%s=%s",contextname,utfchar(u))) ---~ end ---~ elseif is_command[category] then ---~ texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -- no texprint ---~ a = a + 1 ---~ activated[a] = u ---~ end if is_character[category] then if chr.unicodeslot < 128 then if is_letter[category] then @@ -608,35 +602,32 @@ function characters.setcodes() report_defining("defining lc and uc codes") end for code, chr in next, data do - local cc = chr.category -- mn lo - if cc == 'll' or cc == 'lu' or cc == 'lt' then - local lc, uc = chr.lccode, chr.uccode - if not lc then chr.lccode, lc = code, code end - if not uc then chr.uccode, uc = code, code end - texsetcatcode(code,11) -- letter - if type(lc) == "table" then - lc = code - end - if type(uc) == "table" then - uc = code - end - texsetlccode(code,lc,uc) - if cc == "lu" then - texsetsfcode(code,999) - end - elseif cc == "lo" then + local cc = chr.category + if is_letter[cc] then local range = chr.range if range then for i=range.first,range.last do texsetcatcode(i,11) -- letter texsetlccode(i,i,i) -- self self end - else -- letter - texsetcatcode(code,11) - texsetlccode(code,code,code) + else + local lc, uc = chr.lccode, chr.uccode + if not lc then chr.lccode, lc = code, code end + if not uc then chr.uccode, uc = code, code end + texsetcatcode(code,11) -- letter + if type(lc) == "table" then + lc = code + end + if type(uc) == "table" then + uc = code + end + texsetlccode(code,lc,uc) + if cc == "lu" then + texsetsfcode(code,999) + end end - elseif cc == "mn" then -- mark - texsetlccode(code,code,code) + elseif is_mark[cc] then + texsetlccode(code,code,code) -- for hyphenation end end end diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua index c509231e3..30124a6a2 100644 --- a/tex/context/base/char-utf.lua +++ b/tex/context/base/char-utf.lua @@ -20,7 +20,7 @@ over a string.

--ldx]]-- local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub -local concat, gmatch, gsub = table.concat, string.gmatch, string.gsub +local concat, gmatch, gsub, find = table.concat, string.gmatch, string.gsub, string.find local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues local allocate = utilities.storage.allocate @@ -34,6 +34,9 @@ local characters = characters characters.graphemes = allocate() local graphemes = characters.graphemes +characters.decomposed = allocate() +local decomposed = characters.decomposed + characters.mathpairs = allocate() local mathpairs = characters.mathpairs @@ -48,13 +51,34 @@ local utffilters = characters.filters.utf source code to depend on collapsing.

--ldx]]-- +-- for the moment, will be entries in char-def.lua + +local decomposed = allocate { + ["IJ"] = "IJ", + ["ij"] = "ij", + ["և"] = "եւ", + ["ff"] = "ff", + ["fi"] = "fi", + ["fl"] = "fl", + ["ffi"] = "ffi", + ["ffl"] = "ffl", + ["ſt"] = "ſt", + ["st"] = "st", + ["ﬓ"] = "մն", + ["ﬔ"] = "մե", + ["ﬕ"] = "մի", + ["ﬖ"] = "վն", + ["ﬗ"] = "մխ", +} +characters.decomposed = decomposed + local function initialize() - for k,v in next, characters.data do + for unicode, v in next, characters.data do -- using vs and first testing for length is faster (.02->.01 s) local vs = v.specials if vs and #vs == 3 and vs[1] == 'char' then local one, two = vs[2], vs[3] - local first, second, combined = utfchar(one), utfchar(two), utfchar(k) + local first, second, combined = utfchar(one), utfchar(two), utfchar(unicode) local cgf = graphemes[first] if not cgf then cgf = { } @@ -67,7 +91,7 @@ local function initialize() mps = { } mathpairs[two] = mps end - mps[one] = k + mps[one] = unicode -- here unicode local mps = mathpairs[second] if not mps then mps = { } @@ -75,6 +99,26 @@ local function initialize() end mps[first] = combined end + -- else + -- local description = v.description + -- if find(description,"LIGATURE") then + -- if vs then + -- local t = { } + -- for i=2,#vs do + -- t[#t+1] = utfchar(vs[i]) + -- end + -- decomposed[utfchar(unicode)] = concat(t) + -- else + -- local vs = v.shcode + -- if vs then + -- local t = { } + -- for i=1,#vs do + -- t[i] = utfchar(vs[i]) + -- end + -- decomposed[utfchar(unicode)] = concat(t) + -- end + -- end + -- end end end initialize = false @@ -164,6 +208,113 @@ not collecting tokens is not only faster but also saves garbage collecting. --ldx]]-- -- lpeg variant is not faster +-- +-- I might use the combined loop at some point for the filter +-- some day. + +--~ function utffilters.collapse(str) -- not really tested (we could preallocate a table) +--~ if str and str ~= "" then +--~ local nstr = #str +--~ if nstr > 1 then +--~ if initialize then -- saves a call +--~ initialize() +--~ end +--~ local tokens, t, first, done, n = { }, 0, false, false, 0 +--~ for second in utfcharacters(str) do +--~ local dec = decomposed[second] +--~ if dec then +--~ if not done then +--~ if n > 0 then +--~ for s in utfcharacters(str) do +--~ if n == 1 then +--~ break +--~ else +--~ t = t + 1 +--~ tokens[t] = s +--~ n = n - 1 +--~ end +--~ end +--~ end +--~ done = true +--~ elseif first then +--~ t = t + 1 +--~ tokens[t] = first +--~ end +--~ t = t + 1 +--~ tokens[t] = dec +--~ first = false +--~ elseif done then +--~ local crs = high[second] +--~ if crs then +--~ if first then +--~ t = t + 1 +--~ tokens[t] = first +--~ end +--~ first = crs +--~ else +--~ local cgf = graphemes[first] +--~ if cgf and cgf[second] then +--~ first = cgf[second] +--~ elseif first then +--~ t = t + 1 +--~ tokens[t] = first +--~ first = second +--~ else +--~ first = second +--~ end +--~ end +--~ else +--~ local crs = high[second] +--~ if crs then +--~ for s in utfcharacters(str) do +--~ if n == 1 then +--~ break +--~ else +--~ t = t + 1 +--~ tokens[t] = s +--~ n = n - 1 +--~ end +--~ end +--~ if first then +--~ t = t + 1 +--~ tokens[t] = first +--~ end +--~ first = crs +--~ done = true +--~ else +--~ local cgf = graphemes[first] +--~ if cgf and cgf[second] then +--~ for s in utfcharacters(str) do +--~ if n == 1 then +--~ break +--~ else +--~ t = t + 1 +--~ tokens[t] = s +--~ n = n - 1 +--~ end +--~ end +--~ first = cgf[second] +--~ done = true +--~ else +--~ first = second +--~ n = n + 1 +--~ end +--~ end +--~ end +--~ end +--~ if done then +--~ if first then +--~ t = t + 1 +--~ tokens[t] = first +--~ end +--~ return concat(tokens) -- seldom called +--~ end +--~ elseif nstr > 0 then +--~ return high[str] or str +--~ end +--~ end +--~ return str +--~ end function utffilters.collapse(str) -- not really tested (we could preallocate a table) if str and str ~= "" then @@ -203,7 +354,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t else t = t + 1 tokens[t] = s - n = n -1 + n = n - 1 end end if first then @@ -221,7 +372,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t else t = t + 1 tokens[t] = s - n = n -1 + n = n - 1 end end first = cgf[second] @@ -234,8 +385,10 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t end end if done then - t = t + 1 - tokens[t] = first + if first then + t = t + 1 + tokens[t] = first + end return concat(tokens) -- seldom called end elseif nstr > 0 then @@ -245,11 +398,61 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t return str end +function utffilters.decompose(str) + if str and str ~= "" then + local nstr = #str + if nstr > 1 then + -- if initialize then -- saves a call + -- initialize() + -- end + local tokens, t, done, n = { }, 0, false, 0 + for s in utfcharacters(str) do + local dec = decomposed[s] + if dec then + if not done then + if n > 0 then + for s in utfcharacters(str) do + if n == 1 then + break + else + t = t + 1 + tokens[t] = s + n = n - 1 + end + end + end + done = true + end + t = t + 1 + tokens[t] = dec + elseif done then + t = t + 1 + tokens[t] = s + else + n = n + 1 + end + end + if done then + return concat(tokens) -- seldom called + end + end + end + return str +end + local textfileactions = resolvers.openers.helpers.textfileactions utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse") utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") +utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose") +utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.decompose") + +function characters.filters.utf.enable() + utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse") + utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose") +end + --[[ldx--

Next we implement some commands that are used in the user interface.

--ldx]]-- diff --git a/tex/context/base/char-utf.mkiv b/tex/context/base/char-utf.mkiv index b59d2f569..261735656 100644 --- a/tex/context/base/char-utf.mkiv +++ b/tex/context/base/char-utf.mkiv @@ -30,9 +30,16 @@ % resolvers.filters.install('utf',characters.filters.utf.collapse) +% \appendtoks +% \ctxlua{ +% local textfileactions = resolvers.openers.helpers.textfileactions +% utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse") +% utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose") +% }% +% \to \everyjob + \appendtoks - \ctxlua{utilities.sequencers.enableaction - (resolvers.openers.textfileactions,"characters.filters.utf.collapse")}% + \ctxlua{characters.filters.utf.enable()}% \to \everyjob %D The next one influences input parsing. diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 30de4263a..5e38335cf 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf index b6a7cce99..c34365ed2 100644 Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua index 11fc6e347..f6aeb6327 100644 --- a/tex/generic/context/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 05/05/11 12:10:55 +-- merge date : 05/06/11 16:52:12 do -- begin closure to overcome local limits and interference -- cgit v1.2.3