From 06c355066a4cf2af674302948c2f3caee06932f2 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Wed, 20 Oct 2010 21:33:00 +0200 Subject: beta 2010.10.20 21:33 --- scripts/context/lua/mtxrun.lua | 19 ++-- scripts/context/stubs/mswin/mtxrun.lua | 19 ++-- scripts/context/stubs/unix/mtxrun | 19 ++-- tex/context/base/char-cmp.lua | 43 ++----- tex/context/base/char-enc.lua | 2 +- tex/context/base/char-ini.lua | 6 - tex/context/base/char-tex.lua | 12 +- tex/context/base/char-utf.lua | 1 - tex/context/base/cont-new.tex | 2 +- tex/context/base/context.tex | 2 +- tex/context/base/enco-ini.mkiv | 2 +- tex/context/base/l-table.lua | 20 ++-- tex/context/base/lang-wrd.lua | 169 ++++++++++++++++++---------- tex/context/base/lang-wrd.mkiv | 15 ++- tex/context/base/node-aux.lua | 9 ++ tex/generic/context/luatex-fonts-merged.lua | 22 ++-- 16 files changed, 214 insertions(+), 148 deletions(-) diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index b792e35af..00d4fe9c3 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -848,20 +848,25 @@ local function do_serialize(root,name,depth,level,indexed) depth = depth .. " " if indexed then handle(format("%s{",depth)) - elseif name then - if type(name) == "number" then -- or find(k,"^%d+$") then + else + local tn = type(name) + if tn == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end - elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) + elseif tn == "string" then + if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) else - handle(format("%s[%q]={",depth,name)) + handle(format("%s{",depth)) end - else - handle(format("%s{",depth)) end end -- we could check for k (index) being number (cardinal) diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index b792e35af..00d4fe9c3 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -848,20 +848,25 @@ local function do_serialize(root,name,depth,level,indexed) depth = depth .. " " if indexed then handle(format("%s{",depth)) - elseif name then - if type(name) == "number" then -- or find(k,"^%d+$") then + else + local tn = type(name) + if tn == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end - elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) + elseif tn == "string" then + if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) else - handle(format("%s[%q]={",depth,name)) + handle(format("%s{",depth)) end - else - handle(format("%s{",depth)) end end -- we could check for k (index) being number (cardinal) diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index b792e35af..00d4fe9c3 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -848,20 +848,25 @@ local function do_serialize(root,name,depth,level,indexed) depth = depth .. " " if indexed then handle(format("%s{",depth)) - elseif name then - if type(name) == "number" then -- or find(k,"^%d+$") then + else + local tn = type(name) + if tn == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end - elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) + elseif tn == "string" then + if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) else - handle(format("%s[%q]={",depth,name)) + handle(format("%s{",depth)) end - else - handle(format("%s{",depth)) end end -- we could check for k (index) being number (cardinal) diff --git a/tex/context/base/char-cmp.lua b/tex/context/base/char-cmp.lua index e522226f8..2cd633370 100644 --- a/tex/context/base/char-cmp.lua +++ b/tex/context/base/char-cmp.lua @@ -6,15 +6,16 @@ if not modules then modules = { } end modules ['char-cmp'] = { license = "see context related readme files" } +-- There is some overlap here with shcodes ... + local type = type -local utf = unicode.utf8 -local utfchar = utf.char +local utfchar, utfbyte = utf.char, utf.byte local unpack = unpack or table.unpack local allocate = utilities.storage.allocate -characters = characters or { } local characters = characters +local chardata = characters.data characters.uncomposed = allocate() local uncomposed = characters.uncomposed @@ -38,7 +39,7 @@ Of course they may come in handy elsewhere too. Using shcodes is not handy here (incpmplete).

--ldx]]-- -uncomposed.left = allocate { +local left = allocate { AEligature = "A", aeligature = "a", OEligature = "O", oeligature = "o", IJligature = "I", ijligature = "i", @@ -48,7 +49,7 @@ uncomposed.left = allocate { Ssharp = "S", ssharp = "s", } -uncomposed.right = allocate { +local right = allocate { AEligature = "E", aeligature = "e", OEligature = "E", oeligature = "e", IJligature = "J", ijligature = "j", @@ -58,7 +59,7 @@ uncomposed.right = allocate { Ssharp = "S", ssharp = "s", } -uncomposed.both = allocate { +local both = allocate { Acircumflex = "A", acircumflex = "a", Ccircumflex = "C", ccircumflex = "c", Ecircumflex = "E", ecircumflex = "e", @@ -177,24 +178,9 @@ uncomposed.both = allocate { } --- adobename ... inclomplete --- --- if characters.data then --- uncomposed.left, uncomposed.right, uncomposed.both = allocate(), allocate(), allocate() --- for k,v in next, characters.data do --- local s = v.shcode --- if s then --- local name = v.adobename --- if not name then --- -- table.print(v) -- only used for afm anyway --- elseif type(s) == "table" then --- uncomposed.left[name], uncomposed.right[name] = s[1], s[#s] --- else --- uncomposed.both[name] = s --- end --- end --- end --- end +uncomposed.left = left +uncomposed.right = right +uncomposed.both = both --[[ldx--

The following function is used in the indexing code, where we @@ -202,16 +188,11 @@ need some sort of default fallback mapping. (This is obsolete!)

--ldx]]-- function characters.uncompose(n) -- n == string|number, returns string - local cdn - if type(n) == "string" then - cdn = characters.data[utf.byte(n)] - else - cdn = characters.data[n] - end + local cdn = type(n) == "string" and chardata[utfbyte(n)] or chardata[n] if cdn then local shcode = cdn.shcode if not shcode then - return uncomposed.both[cdn.contextname] or n + return both[cdn.contextname] or n elseif type(shcode) == "table" then return utfchar(unpack(cdn.shcode)) else diff --git a/tex/context/base/char-enc.lua b/tex/context/base/char-enc.lua index bdca9582c..4d7ceaa57 100644 --- a/tex/context/base/char-enc.lua +++ b/tex/context/base/char-enc.lua @@ -13,7 +13,7 @@ local allocate = utilities.storage.allocate characters = characters or { } local characters = characters -characters.synonyms = allocate { +characters.synonyms = allocate { -- afm mess angle = 0x2220, anticlockwise = 0x21BA, arrowaxisleft = 0x2190, diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index f86eeaf66..6d58f6e98 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -582,12 +582,6 @@ else -- char-obs end -function characters.charcode(box) - local b = tex.box[box] - local l = b.list - texsprint((l and l.id == node.id('glyph') and l.char) or 0) -end - --[[ldx--

Setting the lccodes is also done in a loop over the data table.

--ldx]]-- diff --git a/tex/context/base/char-tex.lua b/tex/context/base/char-tex.lua index 6e57a860a..538915dd3 100644 --- a/tex/context/base/char-tex.lua +++ b/tex/context/base/char-tex.lua @@ -6,9 +6,9 @@ if not modules then modules = { } end modules ['char-tex'] = { license = "see context related readme files" } -local find = string.find - local lpeg = lpeg + +local find = string.find local P, C, R, S, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc local U, lpegmatch = lpeg.patterns.utf8, lpeg.match @@ -77,13 +77,11 @@ local convert_accents_strip = Cs((no_l * accents * no_r + accents + P(1))^0) local convert_commands_strip = Cs((no_l * commands * no_r + commands + P(1))^0) function characters.tex.toutf(str,strip) - if find(str,"\\") then -- we can start at teh found position + if find(str,"\\") then -- we can start at the found position if strip then - str = lpegmatch(convert_commands_strip,str) - str = lpegmatch(convert_accents_strip,str) + return lpegmatch(convert_accents_strip,lpegmatch(convert_commands_strip,str)) else - str = lpegmatch(convert_commands,str) - str = lpegmatch(convert_accents,str) + return lpegmatch(convert_accents, lpegmatch(convert_commands, str)) end end return str diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua index d8ffdeed0..25c072dff 100644 --- a/tex/context/base/char-utf.lua +++ b/tex/context/base/char-utf.lua @@ -19,7 +19,6 @@ in special kinds of output (for instance ).

over a string.

--ldx]]-- -local utf = unicode.utf8 local utfchar, utfbyte, utfgsub = utf.char, utf.byte, utf.gsub local concat, gmatch, gsub = table.concat, string.gmatch, string.gsub local utfcharacters, utfvalues = string.utfcharacters, string.utfvalues diff --git a/tex/context/base/cont-new.tex b/tex/context/base/cont-new.tex index b8f5f2dff..0a5b52216 100644 --- a/tex/context/base/cont-new.tex +++ b/tex/context/base/cont-new.tex @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2010.10.20 13:11} +\newcontextversion{2010.10.20 21:33} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/context.tex b/tex/context/base/context.tex index 77f42b1ab..8562f9d56 100644 --- a/tex/context/base/context.tex +++ b/tex/context/base/context.tex @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2010.10.20 13:11} +\edef\contextversion{2010.10.20 21:33} %D For those who want to use this: diff --git a/tex/context/base/enco-ini.mkiv b/tex/context/base/enco-ini.mkiv index da1892faf..70cbd2ce0 100644 --- a/tex/context/base/enco-ini.mkiv +++ b/tex/context/base/enco-ini.mkiv @@ -100,7 +100,7 @@ \unexpanded\def\buildtextaccent#1#2% {\begingroup \global\setbox\accenttestbox\hbox{#1}% - \scratchcounter\ctxlua{characters.charcode(\number\accenttestbox)}% + \scratchcounter\cldcontext{nodes.firstcharinbox(\number\accenttestbox)}% \ifcase\scratchcounter\else\accent\scratchcounter\fi \relax#2% \endgroup} diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua index b661e7aaa..4be077dfa 100644 --- a/tex/context/base/l-table.lua +++ b/tex/context/base/l-table.lua @@ -332,21 +332,25 @@ local function do_serialize(root,name,depth,level,indexed) depth = depth .. " " if indexed then handle(format("%s{",depth)) - elseif name then - --~ handle(format("%s%s={",depth,key(name))) - if type(name) == "number" then -- or find(k,"^%d+$") then + else + local tn = type(name) + if tn == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end - elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) + elseif tn == "string" then + if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) else - handle(format("%s[%q]={",depth,name)) + handle(format("%s{",depth)) end - else - handle(format("%s{",depth)) end end -- we could check for k (index) being number (cardinal) diff --git a/tex/context/base/lang-wrd.lua b/tex/context/base/lang-wrd.lua index 9efde5a05..4d131f45a 100644 --- a/tex/context/base/lang-wrd.lua +++ b/tex/context/base/lang-wrd.lua @@ -37,6 +37,9 @@ local disc_code = nodecodes.disc local kern_code = nodecodes.kern local kerning_code = kerncodes.kerning +local lowerchar = characters.lower + +local a_color = attributes.private('color') words.colors = { ["known"] = "green", @@ -84,11 +87,12 @@ end -- hyphenating and spell checking. local function mark_words(head,whenfound) -- can be optimized - local current, start, str, language, n = head, nil, "", nil, 0 + local current, start, str, language, n, done = head, nil, "", nil, 0, false local function action() if #str > 0 then local f = whenfound(language,str) if f then + done = true for i=1,n do f(start) start = start.next @@ -144,86 +148,135 @@ local function mark_words(head,whenfound) -- can be optimized if start then action() end - return head + return head, done end -words.methods = { } -local methods = words.methods +local methods = { } +words.methods = methods + +local enablers = { } +words.enablers = enablers local wordmethod = 1 +local enabled = false -methods[1] = function(head, attribute, yes, nop) - local right, wrong = false, false - if yes then right = function(n) set_attribute(n,attribute,yes) end end - if nop then wrong = function(n) set_attribute(n,attribute,nop) end end - for n in traverse_nodes(head) do - unset_attribute(n,attribute) -- hm, not that selective (reset color) +function words.check(head) + if enabled and head.next then + return methods[wordmethod](head) + else + return head, false end - local found, done = words.found, false - mark_words(head, function(language,str) - if #str < words.threshold then - return false - elseif found(language,str) then - done = true - return right - else - done = true - return wrong - end - end) - return head, done end -local list = { } -- todo: per language - -local lowerchar = characters.lower +function words.enable(settings) + local method = settings.method + wordmethod = method and tonumber(method) or wordmethod or 1 + local e = enablers[wordmethod] + if e then e(settings) end + tasks.enableaction("processors","languages.words.check") + enabled = true +end -methods[2] = function(head, attribute) - dump = true - mark_words(head, function(language,str) - if #str >= words.threshold then - str = lowerchar(str) - list[str] = (list[str] or 0) + 1 - end - end) - return head, true +function words.disable() + enabled = false end --- words.used = list +-- method 1 -directives.register("languages.words.dump", function(v) - local name = type(v) == "string" and v ~= "" and v or file.addsuffix(tex.jobname,"words") - local function dumpusedwords(name) - report_languages("saving list of used words in '%s'",name) - io.savedata(name,table.serialize(list)) +local colors = words.colors +local colist = attributes.list[a_color] + +local right = function(n) set_attribute(n,a_color,colist[colors.known]) end +local wrong = function(n) set_attribute(n,a_color,colist[colors.unknown]) end + +local function sweep(language,str) + if #str < words.threshold then + return false + elseif words.found(language,str) then + return right + else + return wrong + end +end + +methods[1] = function(head) + for n in traverse_nodes(head) do + unset_attribute(n,attribute) -- hm, not that selective (reset color) end - luatex.registerstopactions(dumpusedwords) -end ) + return mark_words(head,sweep) +end -local color = attributes.private('color') +-- method 2 -local enabled = false +local dumpname = nil +local dumpthem = false +local listname = "document" -function words.check(head) - if enabled and head.next then - local colors = words.colors - local alc = attributes.list[color] - return methods[wordmethod](head, color, alc[colors.known], alc[colors.unknown]) - else - return head, false +local category = { } + +local collected = { + total = 0, + categories = { document = { total = 0, list = { } } }, +} + +enablers[2] = function(settings) + local name = settings.list + listname = name and name ~= "" and name or "document" + category = collected.categories[listname] + if not category then + category = { } + collected.categories[listname] = category end end -function words.enable(method) - tasks.enableaction("processors","languages.words.check") - wordmethod = method or wordmethod or 1 - enabled = true +local numbers = languages.numbers +local registered = languages.registered + +local function sweep(language,str) + if #str >= words.threshold then + collected.total = collected.total + 1 + str = lowerchar(str) + local number = numbers[language] or "unset" + local words = category[number] + if not words then + local r = registered[number] + category[number] = { + number = language, + parent = r and r.parent or nil, + patterns = r and r.patterns or nil, + tag = r and r.tag or nil, + list = { [str] = 1 }, + total = 1, + } + else + local list = words.list + list[str] = (list[str] or 0) + 1 + words.total = words.total + 1 + end + end end -function words.disable() - enabled = false +methods[2] = function(head) + dumpthem = true + return mark_words(head,sweep) end +local function dumpusedwords() + if dumpthem then + collected.threshold = words.threshold + dumpname = dumpname or file.addsuffix(tex.jobname,"words") + report_languages("saving list of used words in '%s'",dumpname) + io.savedata(dumpname,table.serialize(collected,true)) + -- table.tofile(dumpname,list,true) + end +end + +directives.register("languages.words.dump", function(v) + dumpname = type(v) == "string" and v ~= "" and v +end) + +luatex.registerstopactions(dumpusedwords) + -- for the moment we hook it into the attribute handler --~ languagehacks = { } diff --git a/tex/context/base/lang-wrd.mkiv b/tex/context/base/lang-wrd.mkiv index a706c21a7..9b149462a 100644 --- a/tex/context/base/lang-wrd.mkiv +++ b/tex/context/base/lang-wrd.mkiv @@ -37,14 +37,23 @@ \unexpanded\def\setupspellchecking {\dosingleargument\dosetupspellchecking} +\newtoks\everysetupspellchecking + \unexpanded\def\setupspellchecking[#1]% todo colors {\getparameters[\??wl][#1]% + \the\everysetupspellchecking} + +\appendtoks \doifelse\@@wlstate\v!start - {\ctxlua{languages.words.enable(\@@wlmethod)}} - {\ctxlua{languages.words.disable()}}} + {\ctxlua{languages.words.enable { method = "\@@wlmethod", list = "\@@wllist" }}} + {\ctxlua{languages.words.disable()}}% +\to \everysetupspellchecking + +% beware, maybe some day we will honour grouping \setupspellchecking [\c!state=\v!stop, - \c!method=1] + \c!method=1, + \c!list=] \protect \endinput diff --git a/tex/context/base/node-aux.lua b/tex/context/base/node-aux.lua index 58049f020..0d4ab665d 100644 --- a/tex/context/base/node-aux.lua +++ b/tex/context/base/node-aux.lua @@ -20,6 +20,9 @@ local has_attribute = node.has_attribute local set_attribute = node.set_attribute local get_attribute = node.get_attribute local unset_attribute = node.unset_attribute +local first_character = node.first_character + +local texbox = tex.box function nodes.repack_hlist(list,...) local temp, b = hpack_nodes(list,...) @@ -153,3 +156,9 @@ nodes.unset_attributes = unset_attributes -- return -u -- end -- end + +function nodes.firstcharinbox(n) + local l = texbox[n].list + local f = l and first_character(l) + return f and f.char or 0 +end diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua index 55d4883eb..83ca1c35c 100644 --- a/tex/generic/context/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 10/20/10 13:11:27 +-- merge date : 10/20/10 21:33:36 do -- begin closure to overcome local limits and interference @@ -969,21 +969,25 @@ local function do_serialize(root,name,depth,level,indexed) depth = depth .. " " if indexed then handle(format("%s{",depth)) - elseif name then - --~ handle(format("%s%s={",depth,key(name))) - if type(name) == "number" then -- or find(k,"^%d+$") then + else + local tn = type(name) + if tn == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end - elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) + elseif tn == "string" then + if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) else - handle(format("%s[%q]={",depth,name)) + handle(format("%s{",depth)) end - else - handle(format("%s{",depth)) end end -- we could check for k (index) being number (cardinal) -- cgit v1.2.3