From 06c355066a4cf2af674302948c2f3caee06932f2 Mon Sep 17 00:00:00 2001
From: Hans Hagen
Date: Wed, 20 Oct 2010 21:33:00 +0200
Subject: beta 2010.10.20 21:33
---
tex/context/base/char-cmp.lua | 43 ++-----
tex/context/base/char-enc.lua | 2 +-
tex/context/base/char-ini.lua | 6 -
tex/context/base/char-tex.lua | 12 +-
tex/context/base/char-utf.lua | 1 -
tex/context/base/cont-new.tex | 2 +-
tex/context/base/context.tex | 2 +-
tex/context/base/enco-ini.mkiv | 2 +-
tex/context/base/l-table.lua | 20 ++--
tex/context/base/lang-wrd.lua | 169 ++++++++++++++++++----------
tex/context/base/lang-wrd.mkiv | 15 ++-
tex/context/base/node-aux.lua | 9 ++
tex/generic/context/luatex-fonts-merged.lua | 22 ++--
13 files changed, 178 insertions(+), 127 deletions(-)
(limited to 'tex')
diff --git a/tex/context/base/char-cmp.lua b/tex/context/base/char-cmp.lua
index e522226f8..2cd633370 100644
--- a/tex/context/base/char-cmp.lua
+++ b/tex/context/base/char-cmp.lua
@@ -6,15 +6,16 @@ if not modules then modules = { } end modules ['char-cmp'] = {
license = "see context related readme files"
}
+-- There is some overlap here with shcodes ...
+
local type = type
-local utf = unicode.utf8
-local utfchar = utf.char
+local utfchar, utfbyte = utf.char, utf.byte
local unpack = unpack or table.unpack
local allocate = utilities.storage.allocate
-characters = characters or { }
local characters = characters
+local chardata = characters.data
characters.uncomposed = allocate()
local uncomposed = characters.uncomposed
@@ -38,7 +39,7 @@ Of course they may come in handy elsewhere too. Using shcodes is
not handy here (incpmplete).
--ldx]]--
-uncomposed.left = allocate {
+local left = allocate {
AEligature = "A", aeligature = "a",
OEligature = "O", oeligature = "o",
IJligature = "I", ijligature = "i",
@@ -48,7 +49,7 @@ uncomposed.left = allocate {
Ssharp = "S", ssharp = "s",
}
-uncomposed.right = allocate {
+local right = allocate {
AEligature = "E", aeligature = "e",
OEligature = "E", oeligature = "e",
IJligature = "J", ijligature = "j",
@@ -58,7 +59,7 @@ uncomposed.right = allocate {
Ssharp = "S", ssharp = "s",
}
-uncomposed.both = allocate {
+local both = allocate {
Acircumflex = "A", acircumflex = "a",
Ccircumflex = "C", ccircumflex = "c",
Ecircumflex = "E", ecircumflex = "e",
@@ -177,24 +178,9 @@ uncomposed.both = allocate {
}
--- adobename ... inclomplete
---
--- if characters.data then
--- uncomposed.left, uncomposed.right, uncomposed.both = allocate(), allocate(), allocate()
--- for k,v in next, characters.data do
--- local s = v.shcode
--- if s then
--- local name = v.adobename
--- if not name then
--- -- table.print(v) -- only used for afm anyway
--- elseif type(s) == "table" then
--- uncomposed.left[name], uncomposed.right[name] = s[1], s[#s]
--- else
--- uncomposed.both[name] = s
--- end
--- end
--- end
--- end
+uncomposed.left = left
+uncomposed.right = right
+uncomposed.both = both
--[[ldx--
The following function is used in the indexing code, where we
@@ -202,16 +188,11 @@ need some sort of default fallback mapping. (This is obsolete!)
--ldx]]--
function characters.uncompose(n) -- n == string|number, returns string
- local cdn
- if type(n) == "string" then
- cdn = characters.data[utf.byte(n)]
- else
- cdn = characters.data[n]
- end
+ local cdn = type(n) == "string" and chardata[utfbyte(n)] or chardata[n]
if cdn then
local shcode = cdn.shcode
if not shcode then
- return uncomposed.both[cdn.contextname] or n
+ return both[cdn.contextname] or n
elseif type(shcode) == "table" then
return utfchar(unpack(cdn.shcode))
else
diff --git a/tex/context/base/char-enc.lua b/tex/context/base/char-enc.lua
index bdca9582c..4d7ceaa57 100644
--- a/tex/context/base/char-enc.lua
+++ b/tex/context/base/char-enc.lua
@@ -13,7 +13,7 @@ local allocate = utilities.storage.allocate
characters = characters or { }
local characters = characters
-characters.synonyms = allocate {
+characters.synonyms = allocate { -- afm mess
angle = 0x2220,
anticlockwise = 0x21BA,
arrowaxisleft = 0x2190,
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index f86eeaf66..6d58f6e98 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -582,12 +582,6 @@ else -- char-obs
end
-function characters.charcode(box)
- local b = tex.box[box]
- local l = b.list
- texsprint((l and l.id == node.id('glyph') and l.char) or 0)
-end
-
--[[ldx--
Setting the lccodes is also done in a loop over the data table.
--ldx]]--
diff --git a/tex/context/base/char-tex.lua b/tex/context/base/char-tex.lua
index 6e57a860a..538915dd3 100644
--- a/tex/context/base/char-tex.lua
+++ b/tex/context/base/char-tex.lua
@@ -6,9 +6,9 @@ if not modules then modules = { } end modules ['char-tex'] = {
license = "see context related readme files"
}
-local find = string.find
-
local lpeg = lpeg
+
+local find = string.find
local P, C, R, S, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc
local U, lpegmatch = lpeg.patterns.utf8, lpeg.match
@@ -77,13 +77,11 @@ local convert_accents_strip = Cs((no_l * accents * no_r + accents + P(1))^0)
local convert_commands_strip = Cs((no_l * commands * no_r + commands + P(1))^0)
function characters.tex.toutf(str,strip)
- if find(str,"\\") then -- we can start at teh found position
+ if find(str,"\\") then -- we can start at the found position
if strip then
- str = lpegmatch(convert_commands_strip,str)
- str = lpegmatch(convert_accents_strip,str)
+ return lpegmatch(convert_accents_strip,lpegmatch(convert_commands_strip,str))
else
- str = lpegmatch(convert_commands,str)
- str = lpegmatch(convert_accents,str)
+ return lpegmatch(convert_accents, lpegmatch(convert_commands, str))
end
end
return str
diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua
index d8ffdeed0..25c072dff 100644
--- a/tex/context/base/char-utf.lua
+++ b/tex/context/base/char-utf.lua
@@ -19,7 +19,6 @@ in special kinds of output (for instance ).
over a string.
--ldx]]--
-local utf = unicode.utf8
local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub
local concat, gmatch, gsub = table.concat, string.gmatch, string.gsub
local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues
diff --git a/tex/context/base/cont-new.tex b/tex/context/base/cont-new.tex
index b8f5f2dff..0a5b52216 100644
--- a/tex/context/base/cont-new.tex
+++ b/tex/context/base/cont-new.tex
@@ -11,7 +11,7 @@
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.
-\newcontextversion{2010.10.20 13:11}
+\newcontextversion{2010.10.20 21:33}
%D This file is loaded at runtime, thereby providing an
%D excellent place for hacks, patches, extensions and new
diff --git a/tex/context/base/context.tex b/tex/context/base/context.tex
index 77f42b1ab..8562f9d56 100644
--- a/tex/context/base/context.tex
+++ b/tex/context/base/context.tex
@@ -20,7 +20,7 @@
%D your styles an modules.
\edef\contextformat {\jobname}
-\edef\contextversion{2010.10.20 13:11}
+\edef\contextversion{2010.10.20 21:33}
%D For those who want to use this:
diff --git a/tex/context/base/enco-ini.mkiv b/tex/context/base/enco-ini.mkiv
index da1892faf..70cbd2ce0 100644
--- a/tex/context/base/enco-ini.mkiv
+++ b/tex/context/base/enco-ini.mkiv
@@ -100,7 +100,7 @@
\unexpanded\def\buildtextaccent#1#2%
{\begingroup
\global\setbox\accenttestbox\hbox{#1}%
- \scratchcounter\ctxlua{characters.charcode(\number\accenttestbox)}%
+ \scratchcounter\cldcontext{nodes.firstcharinbox(\number\accenttestbox)}%
\ifcase\scratchcounter\else\accent\scratchcounter\fi
\relax#2%
\endgroup}
diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua
index b661e7aaa..4be077dfa 100644
--- a/tex/context/base/l-table.lua
+++ b/tex/context/base/l-table.lua
@@ -332,21 +332,25 @@ local function do_serialize(root,name,depth,level,indexed)
depth = depth .. " "
if indexed then
handle(format("%s{",depth))
- elseif name then
- --~ handle(format("%s%s={",depth,key(name)))
- if type(name) == "number" then -- or find(k,"^%d+$") then
+ else
+ local tn = type(name)
+ if tn == "number" then -- or find(k,"^%d+$") then
if hexify then
handle(format("%s[0x%04X]={",depth,name))
else
handle(format("%s[%s]={",depth,name))
end
- elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
- handle(format("%s%s={",depth,name))
+ elseif tn == "string" then
+ if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
+ handle(format("%s%s={",depth,name))
+ else
+ handle(format("%s[%q]={",depth,name))
+ end
+ elseif tn == "boolean" then
+ handle(format("%s[%s]={",depth,tostring(name)))
else
- handle(format("%s[%q]={",depth,name))
+ handle(format("%s{",depth))
end
- else
- handle(format("%s{",depth))
end
end
-- we could check for k (index) being number (cardinal)
diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua
index 9efde5a05..4d131f45a 100644
--- a/tex/context/base/lang-wrd.lua
+++ b/tex/context/base/lang-wrd.lua
@@ -37,6 +37,9 @@ local disc_code = nodecodes.disc
local kern_code = nodecodes.kern
local kerning_code = kerncodes.kerning
+local lowerchar = characters.lower
+
+local a_color = attributes.private('color')
words.colors = {
["known"] = "green",
@@ -84,11 +87,12 @@ end
-- hyphenating and spell checking.
local function mark_words(head,whenfound) -- can be optimized
- local current, start, str, language, n = head, nil, "", nil, 0
+ local current, start, str, language, n, done = head, nil, "", nil, 0, false
local function action()
if #str > 0 then
local f = whenfound(language,str)
if f then
+ done = true
for i=1,n do
f(start)
start = start.next
@@ -144,86 +148,135 @@ local function mark_words(head,whenfound) -- can be optimized
if start then
action()
end
- return head
+ return head, done
end
-words.methods = { }
-local methods = words.methods
+local methods = { }
+words.methods = methods
+
+local enablers = { }
+words.enablers = enablers
local wordmethod = 1
+local enabled = false
-methods[1] = function(head, attribute, yes, nop)
- local right, wrong = false, false
- if yes then right = function(n) set_attribute(n,attribute,yes) end end
- if nop then wrong = function(n) set_attribute(n,attribute,nop) end end
- for n in traverse_nodes(head) do
- unset_attribute(n,attribute) -- hm, not that selective (reset color)
+function words.check(head)
+ if enabled and head.next then
+ return methods[wordmethod](head)
+ else
+ return head, false
end
- local found, done = words.found, false
- mark_words(head, function(language,str)
- if #str < words.threshold then
- return false
- elseif found(language,str) then
- done = true
- return right
- else
- done = true
- return wrong
- end
- end)
- return head, done
end
-local list = { } -- todo: per language
-
-local lowerchar = characters.lower
+function words.enable(settings)
+ local method = settings.method
+ wordmethod = method and tonumber(method) or wordmethod or 1
+ local e = enablers[wordmethod]
+ if e then e(settings) end
+ tasks.enableaction("processors","languages.words.check")
+ enabled = true
+end
-methods[2] = function(head, attribute)
- dump = true
- mark_words(head, function(language,str)
- if #str >= words.threshold then
- str = lowerchar(str)
- list[str] = (list[str] or 0) + 1
- end
- end)
- return head, true
+function words.disable()
+ enabled = false
end
--- words.used = list
+-- method 1
-directives.register("languages.words.dump", function(v)
- local name = type(v) == "string" and v ~= "" and v or file.addsuffix(tex.jobname,"words")
- local function dumpusedwords(name)
- report_languages("saving list of used words in '%s'",name)
- io.savedata(name,table.serialize(list))
+local colors = words.colors
+local colist = attributes.list[a_color]
+
+local right = function(n) set_attribute(n,a_color,colist[colors.known]) end
+local wrong = function(n) set_attribute(n,a_color,colist[colors.unknown]) end
+
+local function sweep(language,str)
+ if #str < words.threshold then
+ return false
+ elseif words.found(language,str) then
+ return right
+ else
+ return wrong
+ end
+end
+
+methods[1] = function(head)
+ for n in traverse_nodes(head) do
+ unset_attribute(n,attribute) -- hm, not that selective (reset color)
end
- luatex.registerstopactions(dumpusedwords)
-end )
+ return mark_words(head,sweep)
+end
-local color = attributes.private('color')
+-- method 2
-local enabled = false
+local dumpname = nil
+local dumpthem = false
+local listname = "document"
-function words.check(head)
- if enabled and head.next then
- local colors = words.colors
- local alc = attributes.list[color]
- return methods[wordmethod](head, color, alc[colors.known], alc[colors.unknown])
- else
- return head, false
+local category = { }
+
+local collected = {
+ total = 0,
+ categories = { document = { total = 0, list = { } } },
+}
+
+enablers[2] = function(settings)
+ local name = settings.list
+ listname = name and name ~= "" and name or "document"
+ category = collected.categories[listname]
+ if not category then
+ category = { }
+ collected.categories[listname] = category
end
end
-function words.enable(method)
- tasks.enableaction("processors","languages.words.check")
- wordmethod = method or wordmethod or 1
- enabled = true
+local numbers = languages.numbers
+local registered = languages.registered
+
+local function sweep(language,str)
+ if #str >= words.threshold then
+ collected.total = collected.total + 1
+ str = lowerchar(str)
+ local number = numbers[language] or "unset"
+ local words = category[number]
+ if not words then
+ local r = registered[number]
+ category[number] = {
+ number = language,
+ parent = r and r.parent or nil,
+ patterns = r and r.patterns or nil,
+ tag = r and r.tag or nil,
+ list = { [str] = 1 },
+ total = 1,
+ }
+ else
+ local list = words.list
+ list[str] = (list[str] or 0) + 1
+ words.total = words.total + 1
+ end
+ end
end
-function words.disable()
- enabled = false
+methods[2] = function(head)
+ dumpthem = true
+ return mark_words(head,sweep)
end
+local function dumpusedwords()
+ if dumpthem then
+ collected.threshold = words.threshold
+ dumpname = dumpname or file.addsuffix(tex.jobname,"words")
+ report_languages("saving list of used words in '%s'",dumpname)
+ io.savedata(dumpname,table.serialize(collected,true))
+ -- table.tofile(dumpname,list,true)
+ end
+end
+
+directives.register("languages.words.dump", function(v)
+ dumpname = type(v) == "string" and v ~= "" and v
+end)
+
+luatex.registerstopactions(dumpusedwords)
+
-- for the moment we hook it into the attribute handler
--~ languagehacks = { }
diff --git a/tex/context/base/lang-wrd.mkiv b/tex/context/base/lang-wrd.mkiv
index a706c21a7..9b149462a 100644
--- a/tex/context/base/lang-wrd.mkiv
+++ b/tex/context/base/lang-wrd.mkiv
@@ -37,14 +37,23 @@
\unexpanded\def\setupspellchecking
{\dosingleargument\dosetupspellchecking}
+\newtoks\everysetupspellchecking
+
\unexpanded\def\setupspellchecking[#1]% todo colors
{\getparameters[\??wl][#1]%
+ \the\everysetupspellchecking}
+
+\appendtoks
\doifelse\@@wlstate\v!start
- {\ctxlua{languages.words.enable(\@@wlmethod)}}
- {\ctxlua{languages.words.disable()}}}
+ {\ctxlua{languages.words.enable { method = "\@@wlmethod", list = "\@@wllist" }}}
+ {\ctxlua{languages.words.disable()}}%
+\to \everysetupspellchecking
+
+% beware, maybe some day we will honour grouping
\setupspellchecking
[\c!state=\v!stop,
- \c!method=1]
+ \c!method=1,
+ \c!list=]
\protect \endinput
diff --git a/tex/context/base/node-aux.lua b/tex/context/base/node-aux.lua
index 58049f020..0d4ab665d 100644
--- a/tex/context/base/node-aux.lua
+++ b/tex/context/base/node-aux.lua
@@ -20,6 +20,9 @@ local has_attribute = node.has_attribute
local set_attribute = node.set_attribute
local get_attribute = node.get_attribute
local unset_attribute = node.unset_attribute
+local first_character = node.first_character
+
+local texbox = tex.box
function nodes.repack_hlist(list,...)
local temp, b = hpack_nodes(list,...)
@@ -153,3 +156,9 @@ nodes.unset_attributes = unset_attributes
-- return -u
-- end
-- end
+
+function nodes.firstcharinbox(n)
+ local l = texbox[n].list
+ local f = l and first_character(l)
+ return f and f.char or 0
+end
diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua
index 55d4883eb..83ca1c35c 100644
--- a/tex/generic/context/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
-- merged file : luatex-fonts-merged.lua
-- parent file : luatex-fonts.lua
--- merge date : 10/20/10 13:11:27
+-- merge date : 10/20/10 21:33:36
do -- begin closure to overcome local limits and interference
@@ -969,21 +969,25 @@ local function do_serialize(root,name,depth,level,indexed)
depth = depth .. " "
if indexed then
handle(format("%s{",depth))
- elseif name then
- --~ handle(format("%s%s={",depth,key(name)))
- if type(name) == "number" then -- or find(k,"^%d+$") then
+ else
+ local tn = type(name)
+ if tn == "number" then -- or find(k,"^%d+$") then
if hexify then
handle(format("%s[0x%04X]={",depth,name))
else
handle(format("%s[%s]={",depth,name))
end
- elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
- handle(format("%s%s={",depth,name))
+ elseif tn == "string" then
+ if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
+ handle(format("%s%s={",depth,name))
+ else
+ handle(format("%s[%q]={",depth,name))
+ end
+ elseif tn == "boolean" then
+ handle(format("%s[%s]={",depth,tostring(name)))
else
- handle(format("%s[%q]={",depth,name))
+ handle(format("%s{",depth))
end
- else
- handle(format("%s{",depth))
end
end
-- we could check for k (index) being number (cardinal)
--
cgit v1.2.3