From 815a031881f613340bf8a9a4f721d35642218637 Mon Sep 17 00:00:00 2001
From: Hans Hagen
Date: Fri, 6 May 2011 16:52:00 +0200
Subject: beta 2011.05.06 16:52
---
tex/context/base/back-exp.lua | 5 +-
tex/context/base/back-exp.mkiv | 11 +-
tex/context/base/char-ini.lua | 57 +++-----
tex/context/base/char-utf.lua | 219 +++++++++++++++++++++++++++-
tex/context/base/char-utf.mkiv | 11 +-
tex/context/base/status-files.pdf | Bin 23530 -> 23543 bytes
tex/context/base/status-lua.pdf | Bin 154266 -> 154301 bytes
tex/generic/context/luatex-fonts-merged.lua | 2 +-
8 files changed, 256 insertions(+), 49 deletions(-)
diff --git a/tex/context/base/back-exp.lua b/tex/context/base/back-exp.lua
index 5ceb360dc..46ddc4f53 100644
--- a/tex/context/base/back-exp.lua
+++ b/tex/context/base/back-exp.lua
@@ -1066,7 +1066,7 @@ local function stopexport(v)
if handle then
report_export("saving xml data in '%s",xmlfile)
handle:write(format(xmlpreamble,tex.jobname,os.date(),environment.version,version))
- if cssfile then
+ if type(cssfile) == "string" then
local cssfiles = settings_to_array(cssfile)
for i=1,#cssfiles do
local cssfile = cssfiles[i]
@@ -1099,7 +1099,8 @@ local function stopexport(v)
io.savedata(cssfile,concat(templates,"\n\n"))
-- xhtml references
if xhtmlfile then
- if type(v) ~= "string" or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then
+ -- messy
+ if type(v) ~= "string" or xhtmlfile == true or xhtmlfile == variables.yes or xhtmlfile == "" or xhtmlfile == xmlfile then
xhtmlfile = file.replacesuffix(xmlfile,"xhtml")
else
xhtmlfile = file.addsuffix(xhtmlfile,"xhtml")
diff --git a/tex/context/base/back-exp.mkiv b/tex/context/base/back-exp.mkiv
index 4682e8047..88272fca7 100644
--- a/tex/context/base/back-exp.mkiv
+++ b/tex/context/base/back-exp.mkiv
@@ -109,14 +109,19 @@
\def\c!css {css}
\def\c!xhtml {xhtml}
+\appendtoks
+ \doifsomething{\backendparameter\c!xhtml}
+ {\enabledirectives[backend.export.xhtml=\backendparameter\c!xhtml]}%
+ \doifsomething{\backendparameter\c!css}
+ {\enabledirectives[backend.export.css={\backendparameter\c!css}]}%
+\to \everysetupbackend
+
\appendtoks
\doifsomething{\backendparameter\c!export}
{\setupstructure
[\c!state=\v!start]%
\enabledirectives
- [backend.export=\backendparameter\c!export,%
- backend.export.xhtml=\backendparameter\c!xhtml,%
- backend.export.css={\backendparameter\c!css}]}%
+ [backend.export=\backendparameter\c!export]}%
\to \everysetupbackend
\protect \endinput
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index 7f8c2db2f..c85bb3f49 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -386,10 +386,15 @@ local is_spacing = allocate ( table.tohash {
"zs", "zl","zp",
} )
+local is_mark = allocate ( table.tohash {
+ "mn", "ms",
+} )
+
characters.is_character = is_character
characters.is_letter = is_letter
characters.is_command = is_command
characters.is_spacing = is_spacing
+characters.is_mark = is_mark
local mt = { -- yes or no ?
__index = function(t,k)
@@ -511,17 +516,6 @@ function characters.define(tobelettered, tobeactivated) -- catcodetables
local contextname = chr.contextname
if contextname then
local category = chr.category
---~ if is_character[category] then
---~ if chr.unicodeslot < 128 then
---~ texprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u))
---~ else
---~ texprint(ctxcatcodes,format("\\let\\%s=%s",contextname,utfchar(u)))
---~ end
---~ elseif is_command[category] then
---~ texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -- no texprint
---~ a = a + 1
---~ activated[a] = u
---~ end
if is_character[category] then
if chr.unicodeslot < 128 then
if is_letter[category] then
@@ -608,35 +602,32 @@ function characters.setcodes()
report_defining("defining lc and uc codes")
end
for code, chr in next, data do
- local cc = chr.category -- mn lo
- if cc == 'll' or cc == 'lu' or cc == 'lt' then
- local lc, uc = chr.lccode, chr.uccode
- if not lc then chr.lccode, lc = code, code end
- if not uc then chr.uccode, uc = code, code end
- texsetcatcode(code,11) -- letter
- if type(lc) == "table" then
- lc = code
- end
- if type(uc) == "table" then
- uc = code
- end
- texsetlccode(code,lc,uc)
- if cc == "lu" then
- texsetsfcode(code,999)
- end
- elseif cc == "lo" then
+ local cc = chr.category
+ if is_letter[cc] then
local range = chr.range
if range then
for i=range.first,range.last do
texsetcatcode(i,11) -- letter
texsetlccode(i,i,i) -- self self
end
- else -- letter
- texsetcatcode(code,11)
- texsetlccode(code,code,code)
+ else
+ local lc, uc = chr.lccode, chr.uccode
+ if not lc then chr.lccode, lc = code, code end
+ if not uc then chr.uccode, uc = code, code end
+ texsetcatcode(code,11) -- letter
+ if type(lc) == "table" then
+ lc = code
+ end
+ if type(uc) == "table" then
+ uc = code
+ end
+ texsetlccode(code,lc,uc)
+ if cc == "lu" then
+ texsetsfcode(code,999)
+ end
end
- elseif cc == "mn" then -- mark
- texsetlccode(code,code,code)
+ elseif is_mark[cc] then
+ texsetlccode(code,code,code) -- for hyphenation
end
end
end
diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua
index c509231e3..30124a6a2 100644
--- a/tex/context/base/char-utf.lua
+++ b/tex/context/base/char-utf.lua
@@ -20,7 +20,7 @@ over a string.
--ldx]]--
local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub
-local concat, gmatch, gsub = table.concat, string.gmatch, string.gsub
+local concat, gmatch, gsub, find = table.concat, string.gmatch, string.gsub, string.find
local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues
local allocate = utilities.storage.allocate
@@ -34,6 +34,9 @@ local characters = characters
characters.graphemes = allocate()
local graphemes = characters.graphemes
+characters.decomposed = allocate()
+local decomposed = characters.decomposed
+
characters.mathpairs = allocate()
local mathpairs = characters.mathpairs
@@ -48,13 +51,34 @@ local utffilters = characters.filters.utf
source code to depend on collapsing.
--ldx]]--
+-- for the moment, will be entries in char-def.lua
+
+local decomposed = allocate {
+ ["IJ"] = "IJ",
+ ["ij"] = "ij",
+ ["և"] = "եւ",
+ ["ff"] = "ff",
+ ["fi"] = "fi",
+ ["fl"] = "fl",
+ ["ffi"] = "ffi",
+ ["ffl"] = "ffl",
+ ["ſt"] = "ſt",
+ ["st"] = "st",
+ ["ﬓ"] = "մն",
+ ["ﬔ"] = "մե",
+ ["ﬕ"] = "մի",
+ ["ﬖ"] = "վն",
+ ["ﬗ"] = "մխ",
+}
+characters.decomposed = decomposed
+
local function initialize()
- for k,v in next, characters.data do
+ for unicode, v in next, characters.data do
-- using vs and first testing for length is faster (.02->.01 s)
local vs = v.specials
if vs and #vs == 3 and vs[1] == 'char' then
local one, two = vs[2], vs[3]
- local first, second, combined = utfchar(one), utfchar(two), utfchar(k)
+ local first, second, combined = utfchar(one), utfchar(two), utfchar(unicode)
local cgf = graphemes[first]
if not cgf then
cgf = { }
@@ -67,7 +91,7 @@ local function initialize()
mps = { }
mathpairs[two] = mps
end
- mps[one] = k
+ mps[one] = unicode -- here unicode
local mps = mathpairs[second]
if not mps then
mps = { }
@@ -75,6 +99,26 @@ local function initialize()
end
mps[first] = combined
end
+ -- else
+ -- local description = v.description
+ -- if find(description,"LIGATURE") then
+ -- if vs then
+ -- local t = { }
+ -- for i=2,#vs do
+ -- t[#t+1] = utfchar(vs[i])
+ -- end
+ -- decomposed[utfchar(unicode)] = concat(t)
+ -- else
+ -- local vs = v.shcode
+ -- if vs then
+ -- local t = { }
+ -- for i=1,#vs do
+ -- t[i] = utfchar(vs[i])
+ -- end
+ -- decomposed[utfchar(unicode)] = concat(t)
+ -- end
+ -- end
+ -- end
end
end
initialize = false
@@ -164,6 +208,113 @@ not collecting tokens is not only faster but also saves garbage collecting.
--ldx]]--
-- lpeg variant is not faster
+--
+-- I might use the combined loop at some point for the filter
+-- some day.
+
+--~ function utffilters.collapse(str) -- not really tested (we could preallocate a table)
+--~ if str and str ~= "" then
+--~ local nstr = #str
+--~ if nstr > 1 then
+--~ if initialize then -- saves a call
+--~ initialize()
+--~ end
+--~ local tokens, t, first, done, n = { }, 0, false, false, 0
+--~ for second in utfcharacters(str) do
+--~ local dec = decomposed[second]
+--~ if dec then
+--~ if not done then
+--~ if n > 0 then
+--~ for s in utfcharacters(str) do
+--~ if n == 1 then
+--~ break
+--~ else
+--~ t = t + 1
+--~ tokens[t] = s
+--~ n = n - 1
+--~ end
+--~ end
+--~ end
+--~ done = true
+--~ elseif first then
+--~ t = t + 1
+--~ tokens[t] = first
+--~ end
+--~ t = t + 1
+--~ tokens[t] = dec
+--~ first = false
+--~ elseif done then
+--~ local crs = high[second]
+--~ if crs then
+--~ if first then
+--~ t = t + 1
+--~ tokens[t] = first
+--~ end
+--~ first = crs
+--~ else
+--~ local cgf = graphemes[first]
+--~ if cgf and cgf[second] then
+--~ first = cgf[second]
+--~ elseif first then
+--~ t = t + 1
+--~ tokens[t] = first
+--~ first = second
+--~ else
+--~ first = second
+--~ end
+--~ end
+--~ else
+--~ local crs = high[second]
+--~ if crs then
+--~ for s in utfcharacters(str) do
+--~ if n == 1 then
+--~ break
+--~ else
+--~ t = t + 1
+--~ tokens[t] = s
+--~ n = n - 1
+--~ end
+--~ end
+--~ if first then
+--~ t = t + 1
+--~ tokens[t] = first
+--~ end
+--~ first = crs
+--~ done = true
+--~ else
+--~ local cgf = graphemes[first]
+--~ if cgf and cgf[second] then
+--~ for s in utfcharacters(str) do
+--~ if n == 1 then
+--~ break
+--~ else
+--~ t = t + 1
+--~ tokens[t] = s
+--~ n = n - 1
+--~ end
+--~ end
+--~ first = cgf[second]
+--~ done = true
+--~ else
+--~ first = second
+--~ n = n + 1
+--~ end
+--~ end
+--~ end
+--~ end
+--~ if done then
+--~ if first then
+--~ t = t + 1
+--~ tokens[t] = first
+--~ end
+--~ return concat(tokens) -- seldom called
+--~ end
+--~ elseif nstr > 0 then
+--~ return high[str] or str
+--~ end
+--~ end
+--~ return str
+--~ end
function utffilters.collapse(str) -- not really tested (we could preallocate a table)
if str and str ~= "" then
@@ -203,7 +354,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
else
t = t + 1
tokens[t] = s
- n = n -1
+ n = n - 1
end
end
if first then
@@ -221,7 +372,7 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
else
t = t + 1
tokens[t] = s
- n = n -1
+ n = n - 1
end
end
first = cgf[second]
@@ -234,8 +385,10 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
end
end
if done then
- t = t + 1
- tokens[t] = first
+ if first then
+ t = t + 1
+ tokens[t] = first
+ end
return concat(tokens) -- seldom called
end
elseif nstr > 0 then
@@ -245,11 +398,61 @@ function utffilters.collapse(str) -- not really tested (we could preallocate a t
return str
end
+function utffilters.decompose(str)
+ if str and str ~= "" then
+ local nstr = #str
+ if nstr > 1 then
+ -- if initialize then -- saves a call
+ -- initialize()
+ -- end
+ local tokens, t, done, n = { }, 0, false, 0
+ for s in utfcharacters(str) do
+ local dec = decomposed[s]
+ if dec then
+ if not done then
+ if n > 0 then
+ for s in utfcharacters(str) do
+ if n == 1 then
+ break
+ else
+ t = t + 1
+ tokens[t] = s
+ n = n - 1
+ end
+ end
+ end
+ done = true
+ end
+ t = t + 1
+ tokens[t] = dec
+ elseif done then
+ t = t + 1
+ tokens[t] = s
+ else
+ n = n + 1
+ end
+ end
+ if done then
+ return concat(tokens) -- seldom called
+ end
+ end
+ end
+ return str
+end
+
local textfileactions = resolvers.openers.helpers.textfileactions
utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse")
utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
+utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose")
+utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
+
+function characters.filters.utf.enable()
+ utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
+ utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
+end
+
--[[ldx--
Next we implement some commands that are used in the user interface.
--ldx]]--
diff --git a/tex/context/base/char-utf.mkiv b/tex/context/base/char-utf.mkiv
index b59d2f569..261735656 100644
--- a/tex/context/base/char-utf.mkiv
+++ b/tex/context/base/char-utf.mkiv
@@ -30,9 +30,16 @@
% resolvers.filters.install('utf',characters.filters.utf.collapse)
+% \appendtoks
+% \ctxlua{
+% local textfileactions = resolvers.openers.helpers.textfileactions
+% utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
+% utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
+% }%
+% \to \everyjob
+
\appendtoks
- \ctxlua{utilities.sequencers.enableaction
- (resolvers.openers.textfileactions,"characters.filters.utf.collapse")}%
+ \ctxlua{characters.filters.utf.enable()}%
\to \everyjob
%D The next one influences input parsing.
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 30de4263a..5e38335cf 100644
Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index b6a7cce99..c34365ed2 100644
Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ
diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua
index 11fc6e347..f6aeb6327 100644
--- a/tex/generic/context/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
-- merged file : luatex-fonts-merged.lua
-- parent file : luatex-fonts.lua
--- merge date : 05/05/11 12:10:55
+-- merge date : 05/06/11 16:52:12
do -- begin closure to overcome local limits and interference
--
cgit v1.2.3