summaryrefslogtreecommitdiff
path: root/tex
diff options
context:
space:
mode:
Diffstat (limited to 'tex')
-rw-r--r--tex/context/base/char-def.lua3
-rw-r--r--tex/context/base/char-enc.lua11
-rw-r--r--tex/context/base/char-fio.lua56
-rw-r--r--tex/context/base/char-ini.lua452
-rw-r--r--tex/context/base/char-ini.mkiv4
-rw-r--r--tex/context/base/char-tex.lua540
-rw-r--r--tex/context/base/char-utf.lua298
-rw-r--r--tex/context/base/char-utf.mkiv13
-rw-r--r--tex/context/base/cont-new.mkiv2
-rw-r--r--tex/context/base/context-version.pdfbin4439 -> 4433 bytes
-rw-r--r--tex/context/base/context.mkiv8
-rw-r--r--tex/context/base/font-enc.lua8
-rw-r--r--tex/context/base/font-pre.mkiv12
-rw-r--r--tex/context/base/l-lpeg.lua36
-rw-r--r--tex/context/base/publ-aut.lua78
-rw-r--r--tex/context/base/publ-imp-author.mkvi29
-rw-r--r--tex/context/base/publ-ini.mkiv92
-rw-r--r--tex/context/base/regi-ini.lua2
-rw-r--r--tex/context/base/sort-ini.lua81
-rw-r--r--tex/context/base/status-files.pdfbin24940 -> 24903 bytes
-rw-r--r--tex/context/base/status-lua.pdfbin249536 -> 249609 bytes
-rw-r--r--tex/context/base/x-asciimath.lua4
-rw-r--r--tex/generic/context/luatex/luatex-fonts-merged.lua37
23 files changed, 1004 insertions, 762 deletions
diff --git a/tex/context/base/char-def.lua b/tex/context/base/char-def.lua
index 0e1d8778e..f30e82898 100644
--- a/tex/context/base/char-def.lua
+++ b/tex/context/base/char-def.lua
@@ -2389,6 +2389,7 @@ characters.data={
direction="l",
linebreak="al",
uccode={ 0x53, 0x53 },
+ shcode={ 0x73, 0x73 },
unicodeslot=0xDF,
},
{
@@ -214783,4 +214784,4 @@ characters.data={
linebreak="cm",
unicodeslot=0xE01EF,
},
-} \ No newline at end of file
+}
diff --git a/tex/context/base/char-enc.lua b/tex/context/base/char-enc.lua
index 048837eec..c2061891a 100644
--- a/tex/context/base/char-enc.lua
+++ b/tex/context/base/char-enc.lua
@@ -9,6 +9,8 @@ if not modules then modules = { } end modules ['char-enc'] = {
-- Thanks to tex4ht for these mappings.
+local next = next
+
local allocate, setinitializer = utilities.storage.allocate, utilities.storage.setinitializer
characters = characters or { }
@@ -169,7 +171,10 @@ characters.synonyms = allocate { -- afm mess
-- that table.print would not work on this file unless it is accessed once. This
-- why the serializer does a dummy access.
-local enccodes = allocate() characters.enccodes = enccodes
+local enccodes = allocate()
+characters.enccodes = enccodes
+
+ -- maybe omit context name -> then same as encodings.make_unicode_vector
local function initialize()
for unicode, data in next, characters.data do
@@ -179,7 +184,9 @@ local function initialize()
end
end
for name, unicode in next, characters.synonyms do
- if not enccodes[name] then enccodes[name] = unicode end
+ if not enccodes[name] then
+ enccodes[name] = unicode
+ end
end
end
diff --git a/tex/context/base/char-fio.lua b/tex/context/base/char-fio.lua
new file mode 100644
index 000000000..766ea7123
--- /dev/null
+++ b/tex/context/base/char-fio.lua
@@ -0,0 +1,56 @@
+if not modules then modules = { } end modules ['char-fio'] = {
+ version = 1.001,
+ comment = "companion to char-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- --
+
+local sequencers = utilities.sequencers
+local appendaction = sequencers.appendaction
+local enableaction = sequencers.enableaction
+local disableaction = sequencers.disableaction
+
+local utffilters = characters.filters.utf
+
+local textfileactions = resolvers.openers.helpers.textfileactions
+local textlineactions = resolvers.openers.helpers.textlineactions
+
+appendaction (textfileactions,"system","characters.filters.utf.reorder")
+disableaction(textfileactions, "characters.filters.utf.reorder")
+
+appendaction (textlineactions,"system","characters.filters.utf.reorder")
+disableaction(textlineactions, "characters.filters.utf.reorder")
+
+appendaction (textfileactions,"system","characters.filters.utf.collapse")
+disableaction(textfileactions, "characters.filters.utf.collapse")
+
+appendaction (textfileactions,"system","characters.filters.utf.decompose")
+disableaction(textfileactions, "characters.filters.utf.decompose")
+
+function characters.filters.utf.enable()
+ enableaction(textfileactions,"characters.filters.utf.reorder")
+ enableaction(textfileactions,"characters.filters.utf.collapse")
+ enableaction(textfileactions,"characters.filters.utf.decompose")
+end
+
+local function configure(what,v)
+ if not v then
+ disableaction(textfileactions,what)
+ disableaction(textlineactions,what)
+ elseif v == "line" then
+ disableaction(textfileactions,what)
+ enableaction (textlineactions,what)
+ else -- true or text
+ enableaction (textfileactions,what)
+ disableaction(textlineactions,what)
+ end
+end
+
+directives.register("filters.utf.reorder", function(v) configure("characters.filters.utf.reorder", v) end)
+directives.register("filters.utf.collapse", function(v) configure("characters.filters.utf.collapse", v) end)
+directives.register("filters.utf.decompose", function(v) configure("characters.filters.utf.decompose",v) end)
+
+utffilters.setskippable { "mkiv", "mkvi", "mkix", "mkxi" }
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index eb73cc19e..a2505c0eb 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -7,26 +7,33 @@ if not modules then modules = { } end modules ['char-ini'] = {
}
-- todo: make two files, one for format generation, one for format use
+-- todo: move some to char-utf
-- we can remove the tag range starting at 0xE0000 (special applications)
local utfchar, utfbyte, utfvalues, ustring, utotable = utf.char, utf.byte, utf.values, utf.ustring, utf.totable
local concat, unpack, tohash = table.concat, table.unpack, table.tohash
local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
-local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch
-local P, R, Cs, lpegmatch, patterns = lpeg.P, lpeg.R, lpeg.Cs, lpeg.match, lpeg.patterns
+local format, lower, gsub = string.format, string.lower, string.gsub
+local P, R, S, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cs
-local utf8byte = patterns.utf8byte
-local utf8char = patterns.utf8char
+if not characters then require("char-def") end
-local allocate = utilities.storage.allocate
-local mark = utilities.storage.mark
+local lpegpatterns = lpeg.patterns
+local lpegmatch = lpeg.match
+local utf8byte = lpegpatterns.utf8byte
+local utf8char = lpegpatterns.utf8char
-local setmetatableindex = table.setmetatableindex
+local utfchartabletopattern = lpeg.utfchartabletopattern
-local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
+local allocate = utilities.storage.allocate
+local mark = utilities.storage.mark
-local report_defining = logs.reporter("characters")
+local setmetatableindex = table.setmetatableindex
+
+local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
+
+local report_defining = logs.reporter("characters")
--[[ldx--
<p>This module implements some methods and creates additional datastructured
@@ -60,7 +67,7 @@ end
local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end)
-patterns.chartonumber = pattern
+lpegpatterns.chartonumber = pattern
local function chartonumber(k)
if type(k) == "string" then
@@ -420,13 +427,15 @@ setmetatableindex(otfscripts,function(t,unicode)
return "dflt"
end)
+local splitter = lpeg.splitat(S(":-"))
+
function characters.getrange(name) -- used in font fallback definitions (name or range)
local range = blocks[name]
if range then
return range.first, range.last, range.description, range.gaps
end
name = gsub(name,'"',"0x") -- goodie: tex hex notation
- local start, stop = match(name,"^(.-)[%-%:](.-)$")
+ local start, stop = lpegmatch(splitter,name)
if start and stop then
start, stop = tonumber(start,16) or tonumber(start), tonumber(stop,16) or tonumber(stop)
if start and stop then
@@ -870,17 +879,92 @@ end
----- toupper = Cs((utf8byte/ucchars)^0)
----- toshape = Cs((utf8byte/shchars)^0)
-local tolower = Cs((utf8char/lcchars)^0)
-local toupper = Cs((utf8char/ucchars)^0)
-local toshape = Cs((utf8char/shchars)^0)
-
-patterns.tolower = tolower
-patterns.toupper = toupper
-patterns.toshape = toshape
+local tolower = Cs((utf8char/lcchars)^0) -- no need to check spacing
+local toupper = Cs((utf8char/ucchars)^0) -- no need to check spacing
+local toshape = Cs((utf8char/shchars)^0) -- no need to check spacing
+
+lpegpatterns.tolower = tolower
+lpegpatterns.toupper = toupper
+lpegpatterns.toshape = toshape
+
+-- function characters.lower (str) return lpegmatch(tolower,str) end
+-- function characters.upper (str) return lpegmatch(toupper,str) end
+-- function characters.shaped(str) return lpegmatch(toshape,str) end
+
+local lhash = { }
+local uhash = { }
+local shash = { }
+
+for k, v in next, characters.data do
+ -- if k < 0x11000 then
+ local l = v.lccode
+ if l then
+ if type(l) == "number" then
+ lhash[utfchar(k)] = utfchar(l)
+ elseif #l == 2 then
+ lhash[utfchar(k)] = utfchar(l[1]) .. utfchar(l[2])
+ else
+ inspect(v)
+ end
+ else
+ local u = v.uccode
+ if u then
+ if type(u) == "number" then
+ uhash[utfchar(k)] = utfchar(u)
+ elseif #u == 2 then
+ uhash[utfchar(k)] = utfchar(u[1]) .. utfchar(u[2])
+ else
+ inspect(v)
+ end
+ end
+ end
+ local s = v.shcode
+ if s then
+ if type(s) == "number" then
+ shash[utfchar(k)] = utfchar(s)
+ elseif #s == 2 then
+ shash[utfchar(k)] = utfchar(s[1]) .. utfchar(s[2])
+ else
+ inspect(v)
+ end
+ end
+ -- end
+end
-function characters.lower (str) return lpegmatch(tolower,str) end
-function characters.upper (str) return lpegmatch(toupper,str) end
-function characters.shaped(str) return lpegmatch(toshape,str) end
+local utf8lower = Cs((utfchartabletopattern(lhash) / lhash + utf8char)^0)
+local utf8upper = Cs((utfchartabletopattern(uhash) / uhash + utf8char)^0)
+local utf8shape = Cs((utfchartabletopattern(shash) / shash + utf8char)^0)
+
+lpegpatterns.utf8lower = utf8lower
+lpegpatterns.utf8upper = utf8upper
+lpegpatterns.utf8shape = utf8shape
+
+function characters.lower (str) return lpegmatch(utf8lower,str) end
+function characters.upper (str) return lpegmatch(utf8upper,str) end
+function characters.shaped(str) return lpegmatch(utf8shape,str) end
+
+-- local str = [[
+-- ÀÁÂÃÄÅàáâãäå àáâãäåàáâãäå ÀÁÂÃÄÅÀÁÂÃÄÅ AAAAAAaaaaaa
+-- ÆÇæç æçæç ÆÇÆÇ AECaec
+-- ÈÉÊËèéêë èéêëèéêë ÈÉÊËÈÉÊË EEEEeeee
+-- ÌÍÎÏÞìíîïþ ìíîïþìíîïþ ÌÍÎÏÞÌÍÎÏÞ IIIIÞiiiiþ
+-- Ðð ðð ÐÐ Ðð
+-- Ññ ññ ÑÑ Nn
+-- ÒÓÔÕÖòóôõö òóôõöòóôõö ÒÓÔÕÖÒÓÔÕÖ OOOOOooooo
+-- Øø øø ØØ Oo
+-- ÙÚÛÜùúûü ùúûüùúûü ÙÚÛÜÙÚÛÜ UUUUuuuu
+-- Ýýÿ ýýÿ ÝÝŸ Yyy
+-- ß ß SS ss
+-- Ţţ ţţ ŢŢ Tt
+-- ]]
+--
+-- local lower = characters.lower print(lower(str))
+-- local upper = characters.upper print(upper(str))
+-- local shaped = characters.shaped print(shaped(str))
+--
+-- local c, n = os.clock(), 10000
+-- for i=1,n do lower(str) upper(str) shaped(str) end -- 2.08 => 0.77
+-- print(os.clock()-c,n*#str*3)
-- maybe: (twice as fast when much ascii)
--
@@ -929,15 +1013,6 @@ end
function characters.uccode(n) return uccodes[n] end -- obsolete
function characters.lccode(n) return lccodes[n] end -- obsolete
-function characters.safechar(n)
- local c = data[n]
- if c and c.contextname then
- return "\\" .. c.contextname
- else
- return utfchar(n)
- end
-end
-
function characters.shape(n)
local shcode = shcodes[n]
if not shcode then
@@ -992,36 +1067,36 @@ end
-- groupdata[group] = gdata
-- end
---~ characters.data, characters.groups = chardata, groupdata
-
---~ [0xF0000]={
---~ category="co",
---~ cjkwd="a",
---~ description="<Plane 0x000F Private Use, First>",
---~ direction="l",
---~ unicodeslot=0xF0000,
---~ },
---~ [0xFFFFD]={
---~ category="co",
---~ cjkwd="a",
---~ description="<Plane 0x000F Private Use, Last>",
---~ direction="l",
---~ unicodeslot=0xFFFFD,
---~ },
---~ [0x100000]={
---~ category="co",
---~ cjkwd="a",
---~ description="<Plane 0x0010 Private Use, First>",
---~ direction="l",
---~ unicodeslot=0x100000,
---~ },
---~ [0x10FFFD]={
---~ category="co",
---~ cjkwd="a",
---~ description="<Plane 0x0010 Private Use, Last>",
---~ direction="l",
---~ unicodeslot=0x10FFFD,
---~ },
+-- characters.data, characters.groups = chardata, groupdata
+
+-- [0xF0000]={
+-- category="co",
+-- cjkwd="a",
+-- description="<Plane 0x000F Private Use, First>",
+-- direction="l",
+-- unicodeslot=0xF0000,
+-- },
+-- [0xFFFFD]={
+-- category="co",
+-- cjkwd="a",
+-- description="<Plane 0x000F Private Use, Last>",
+-- direction="l",
+-- unicodeslot=0xFFFFD,
+-- },
+-- [0x100000]={
+-- category="co",
+-- cjkwd="a",
+-- description="<Plane 0x0010 Private Use, First>",
+-- direction="l",
+-- unicodeslot=0x100000,
+-- },
+-- [0x10FFFD]={
+-- category="co",
+-- cjkwd="a",
+-- description="<Plane 0x0010 Private Use, Last>",
+-- direction="l",
+-- unicodeslot=0x10FFFD,
+-- },
if not characters.superscripts then
@@ -1078,259 +1153,6 @@ function characters.showstring(str)
end
end
--- the following code will move to char-tex.lua
-
--- tex
-
-if not tex or not context or not commands then return characters end
-
-local tex = tex
-local texsetlccode = tex.setlccode
-local texsetuccode = tex.setuccode
-local texsetsfcode = tex.setsfcode
-local texsetcatcode = tex.setcatcode
-
-local contextsprint = context.sprint
-local ctxcatcodes = catcodes.numbers.ctxcatcodes
-
---[[ldx--
-<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
-use the table. After all, we have this information available anyway.</p>
---ldx]]--
-
-function commands.makeactive(n,name) --
- contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
- -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
-end
-
-function commands.utfchar(c,n)
- if n then
- -- contextsprint(c,charfromnumber(n))
- contextsprint(c,utfchar(n))
- else
- -- contextsprint(charfromnumber(c))
- contextsprint(utfchar(c))
- end
-end
-
-function commands.safechar(n)
- local c = data[n]
- if c and c.contextname then
- contextsprint("\\" .. c.contextname) -- context[c.contextname]()
- else
- contextsprint(utfchar(n))
- end
-end
-
-tex.uprint = commands.utfchar
-
-local forbidden = tohash { -- at least now
- 0x00A0,
- 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D,
- 0x202F,
- 0x205F,
- -- 0xFEFF,
-}
-
-function characters.define(tobelettered, tobeactivated) -- catcodetables
-
- if trace_defining then
- report_defining("defining active character commands")
- end
-
- local activated, a = { }, 0
-
- for u, chr in next, data do -- these will be commands
- local fallback = chr.fallback
- if fallback then
- contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}")
- a = a + 1
- activated[a] = u
- else
- local contextname = chr.contextname
- if contextname then
- local category = chr.category
- if is_character[category] then
- if chr.unicodeslot < 128 then
- if is_letter[category] then
- contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
- else
- contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s
- end
- else
- contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
- end
- elseif is_command[category] and not forbidden[u] then
- contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}")
- a = a + 1
- activated[a] = u
- end
- end
- end
- end
-
- if tobelettered then -- shared
- local saved = tex.catcodetable
- for i=1,#tobelettered do
- tex.catcodetable = tobelettered[i]
- if trace_defining then
- report_defining("defining letters (global, shared)")
- end
- for u, chr in next, data do
- if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then
- texsetcatcode(u,11)
- end
- local range = chr.range
- if range then
- for i=1,range.first,range.last do -- tricky as not all are letters
- texsetcatcode(i,11)
- end
- end
- end
- texsetcatcode(0x200C,11) -- non-joiner
- texsetcatcode(0x200D,11) -- joiner
- for k, v in next, blocks do
- if v.catcode == "letter" then
- for i=v.first,v.last do
- texsetcatcode(i,11)
- end
- end
- end
- end
- tex.catcodetable = saved
- end
-
- local nofactivated = #tobeactivated
- if tobeactivated and nofactivated > 0 then
- for i=1,nofactivated do
- local u = activated[i]
- if u then
- report_defining("character %U is active in set %a, containing %a",u,data[u].description,tobeactivated)
- end
- end
- local saved = tex.catcodetable
- for i=1,#tobeactivated do
- local vector = tobeactivated[i]
- if trace_defining then
- report_defining("defining %a active characters in vector %a",nofactivated,vector)
- end
- tex.catcodetable = vector
- for i=1,nofactivated do
- local u = activated[i]
- if u then
- texsetcatcode(u,13)
- end
- end
- end
- tex.catcodetable = saved
- end
-
-end
-
---[[ldx--
-<p>Setting the lccodes is also done in a loop over the data table.</p>
---ldx]]--
-
-local sfmode = "unset" -- unset, traditional, normal
-
-function characters.setcodes()
- if trace_defining then
- report_defining("defining lc and uc codes")
- end
- local traditional = sfstate == "traditional" or sfstate == "unset"
- for code, chr in next, data do
- local cc = chr.category
- if is_letter[cc] then
- local range = chr.range
- if range then
- for i=range.first,range.last do
- texsetcatcode(i,11) -- letter
- texsetlccode(i,i,i) -- self self
- end
- else
- local lc, uc = chr.lccode, chr.uccode
- if not lc then
- chr.lccode, lc = code, code
- elseif type(lc) == "table" then
- lc = code
- end
- if not uc then
- chr.uccode, uc = code, code
- elseif type(uc) == "table" then
- uc = code
- end
- texsetcatcode(code,11) -- letter
- texsetlccode(code,lc,uc)
- if traditional and cc == "lu" then
- texsetsfcode(code,999)
- end
- end
- elseif is_mark[cc] then
- texsetlccode(code,code,code) -- for hyphenation
- end
- end
- if traditional then
- sfstate = "traditional"
- end
-end
-
--- If this is something that is not documentwide and used a lot, then we
--- need a more clever approach (trivial but not now).
-
-local function setuppersfcodes(v,n)
- if sfstate ~= "unset" then
- report_defining("setting uppercase sf codes to %a",n)
- for code, chr in next, data do
- if chr.category == "lu" then
- texsetsfcode(code,n)
- end
- end
- end
- sfstate = v
-end
-
-directives.register("characters.spaceafteruppercase",function(v)
- if v == "traditional" then
- setuppersfcodes(v,999)
- elseif v == "normal" then
- setuppersfcodes(v,1000)
- end
-end)
-
--- tex
-
-function commands.chardescription(slot)
- local d = data[slot]
- if d then
- context(d.description)
- end
-end
-
--- xml
-
-characters.activeoffset = 0x10000 -- there will be remapped in that byte range
-
-function commands.remapentity(chr,slot)
- contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
-end
-
--- xml.entities = xml.entities or { }
---
--- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
---
--- function characters.setmkiventities()
--- local entities = xml.entities
--- entities.lt = "<"
--- entities.amp = "&"
--- entities.gt = ">"
--- end
---
--- function characters.setmkiientities()
--- local entities = xml.entities
--- entities.lt = utfchar(characters.activeoffset + utfbyte("<"))
--- entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
--- entities.gt = utfchar(characters.activeoffset + utfbyte(">"))
--- end
+-- code moved to char-tex.lua
-commands.definecatcodetable = characters.define
-commands.setcharactercodes = characters.setcodes
+return characters
diff --git a/tex/context/base/char-ini.mkiv b/tex/context/base/char-ini.mkiv
index db52ae723..4fb63d93e 100644
--- a/tex/context/base/char-ini.mkiv
+++ b/tex/context/base/char-ini.mkiv
@@ -13,9 +13,7 @@
\writestatus{loading}{ConTeXt Character Support / Initialization}
-\registerctxluafile{char-def}{1.001} % let's load this one first
-\registerctxluafile{char-ini}{1.001}
-\registerctxluafile{char-cjk}{1.001}
+\registerctxluafile{char-fio}{1.001}
\registerctxluafile{char-map}{1.001} % maybe we will load this someplace else
\registerctxluafile{char-tex}{1.001}
diff --git a/tex/context/base/char-tex.lua b/tex/context/base/char-tex.lua
index 472cae930..a9a760c7a 100644
--- a/tex/context/base/char-tex.lua
+++ b/tex/context/base/char-tex.lua
@@ -7,16 +7,130 @@ if not modules then modules = { } end modules ['char-tex'] = {
}
local lpeg = lpeg
+local context = context
+local commands = commands
-local find = string.find
+local next, type = next, type
+local format, find, gmatch = string.format, string.find, string.gmatch
+local utfchar, utfbyte = utf.char, utf.byte
+local concat, tohash = table.concat, table.tohash
local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
-local U, lpegmatch = lpeg.patterns.utf8, lpeg.match
-local allocate, mark = utilities.storage.allocate, utilities.storage.mark
+local lpegpatterns = lpeg.patterns
+local lpegmatch = lpeg.match
+local utf8byte = lpegpatterns.utf8byte
+local utf8char = lpegpatterns.utf8char
+local utfchartabletopattern = lpeg.utfchartabletopattern
-characters = characters or { }
-local characters = characters
-characters.tex = characters.tex or { }
+local allocate = utilities.storage.allocate
+local mark = utilities.storage.mark
+
+local characters = characters
+local texcharacters = { }
+characters.tex = texcharacters
+local utffilters = characters.filters.utf
+
+local is_character = characters.is_character
+local is_letter = characters.is_letter
+local is_command = characters.is_command
+local is_spacing = characters.is_spacing
+local is_mark = characters.is_mark
+local is_punctuation = characters.is_punctuation
+
+local data = characters.data if not data then return end
+local blocks = characters.blocks
+
+local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
+
+local report_defining = logs.reporter("characters")
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+--[[ldx--
+<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to
+8-bit. This is handled in the <l n='luatex'/> engine itself.</p>
+
+<p>This leaves us problems with characters that are specific to <l n='tex'/> like
+<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files
+are sensitive for to a private area (while writing to a utility file) and revert then
+to their original slot when we read in such a file. Instead of reverting, we can (when
+we resolve characters to glyphs) map them to their right glyph there. For this purpose
+we can use the private planes 0x0F0000 and 0x100000.</p>
+--ldx]]--
+
+local low = allocate()
+local high = allocate()
+local escapes = allocate()
+local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"
+
+local private = {
+ low = low,
+ high = high,
+ escapes = escapes,
+}
+
+utffilters.private = private
+
+for ch in gmatch(special,".") do
+ local cb
+ if type(ch) == "number" then
+ cb, ch = ch, utfchar(ch)
+ else
+ cb = utfbyte(ch)
+ end
+ if cb < 256 then
+ escapes[ch] = "\\" .. ch
+ low[ch] = utfchar(0x0F0000 + cb)
+ if ch == "%" then
+ ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
+ end
+ high[utfchar(0x0F0000 + cb)] = ch
+ end
+end
+
+local tohigh = lpeg.replacer(low) -- frozen, only for basic tex
+local tolow = lpeg.replacer(high) -- frozen, only for basic tex
+
+lpegpatterns.utftohigh = tohigh
+lpegpatterns.utftolow = tolow
+
+function utffilters.harden(str)
+ return lpegmatch(tohigh,str)
+end
+
+function utffilters.soften(str)
+ return lpegmatch(tolow,str)
+end
+
+private.escape = utf.remapper(escapes)
+private.replace = utf.remapper(low)
+private.revert = utf.remapper(high)
+
+--[[ldx--
+<p>We get a more efficient variant of this when we integrate
+replacements in collapser. This more or less renders the previous
+private code redundant. The following code is equivalent but the
+first snippet uses the relocated dollars.</p>
+
+<typing>
+[󰀤x󰀤] [$x$]
+</typing>
+--ldx]]--
+
+-- using the tree-lpeg-mapper would be nice but we also need to deal with end-of-string
+-- cases: "\"\i" and don't want "\relax" to be seen as \r e lax" (for which we need to mess
+-- with spaces
local accentmapping = allocate {
['"'] = { [""] = "¨",
@@ -128,7 +242,7 @@ local accentmapping = allocate {
},
}
-characters.tex.accentmapping = accentmapping
+texcharacters.accentmapping = accentmapping
local accent_map = allocate { -- incomplete
['~'] = "̃" , -- ̃ Ẽ
@@ -150,7 +264,7 @@ local accent_map = allocate { -- incomplete
-- ̰ Ḛ
}
--- local accents = table.concat(table.keys(accentmapping)) -- was _map
+-- local accents = concat(table.keys(accentmapping)) -- was _map
local function remap_accent(a,c,braced)
local m = accentmapping[a]
@@ -171,7 +285,7 @@ local function remap_accent(a,c,braced)
end
end
-local command_map = allocate {
+local commandmapping = allocate {
["i"] = "ı",
["l"] = "ł",
["ss"] = "ß",
@@ -185,68 +299,125 @@ local command_map = allocate {
["AA"] = "Å",
}
--- no need for U here
-
-local achar = R("az","AZ") + P("ı") + P("\\i")
+texcharacters.commandmapping = commandmapping
-local spaces = P(" ")^0
-local no_l = P("{") / ""
-local no_r = P("}") / ""
-local no_b = P('\\') / ""
+-- local achar = R("az","AZ") + P("ı") + P("\\i")
+--
+-- local spaces = P(" ")^0
+-- local no_l = P("{") / ""
+-- local no_r = P("}") / ""
+-- local no_b = P('\\') / ""
+--
+-- local lUr = P("{") * C(achar) * P("}")
+--
+-- local accents_1 = [["'.=^`~]]
+-- local accents_2 = [[Hckruv]]
+--
+-- local accent = P('\\') * (
+-- C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up
+-- C(S(accents_2)) * lUr * Cc(true)
+-- ) / remap_accent
+--
+-- local csname = P('\\') * C(R("az","AZ")^1)
+--
+-- local command = (
+-- csname +
+-- P("{") * csname * spaces * P("}")
+-- ) / commandmapping -- remap_commands
+--
+-- local both_1 = Cs { "run",
+-- accent = accent,
+-- command = command,
+-- run = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0,
+-- }
+--
+-- local both_2 = Cs { "run",
+-- accent = accent,
+-- command = command,
+-- run = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0,
+-- }
+--
+-- function texcharacters.toutf(str,strip)
+-- if not find(str,"\\") then
+-- return str
+-- elseif strip then
+-- return lpegmatch(both_1,str)
+-- else
+-- return lpegmatch(both_2,str)
+-- end
+-- end
-local lUr = P("{") * C(achar) * P("}")
+local untex
-local accents_1 = [["'.=^`~]]
-local accents_2 = [[Hckruv]]
+local function toutfpattern()
+ if not untex then
+ local hash = { }
+ for k, v in next, accentmapping do
+ for kk, vv in next, v do
+ if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
+ hash[ "\\"..k.." "..kk ] = vv
+ hash["{\\"..k.." "..kk.."}"] = vv
+ else
+ hash["\\" ..k ..kk ] = vv
+ hash["{\\"..k ..kk.."}"] = vv
+ end
+ hash["\\" ..k.."{"..kk.."}" ] = vv
+ hash["{\\"..k.."{"..kk.."}}"] = vv
+ end
+ end
+ for k, v in next, commandmapping do
+ hash["\\"..k.." "] = v
+ hash["{\\"..k.."}"] = v
+ hash["{\\"..k.." }"] = v
+ end
+ untex = utfchartabletopattern(hash) / hash
+ end
+ return untex
+end
-local accent = P('\\') * (
- C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up
- C(S(accents_2)) * lUr * Cc(true)
-) / remap_accent
+texcharacters.toutfpattern = toutfpattern
-local csname = P('\\') * C(R("az","AZ")^1)
+local pattern = nil
-local command = (
- csname +
- P("{") * csname * spaces * P("}")
-) / command_map -- remap_commands
+local function prepare()
+ pattern = Cs((toutfpattern() + P(1))^0)
+ return pattern
+end
-local both_1 = Cs { "run",
- accent = accent,
- command = command,
- run = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0,
-}
+function texcharacters.toutf(str,strip)
+ if str == "" then
+ return str
+ elseif not find(str,"\\") then
+ return str
+ -- elseif strip then
+ else
+ return lpegmatch(pattern or prepare(),str)
+ end
+end
-local both_2 = Cs { "run",
- accent = accent,
- command = command,
- run = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0,
-}
+-- print(texcharacters.toutf([[\~{Z}]],true))
+-- print(texcharacters.toutf([[\'\i]],true))
+-- print(texcharacters.toutf([[\'{\i}]],true))
+-- print(texcharacters.toutf([[\"{e}]],true))
+-- print(texcharacters.toutf([[\" {e}]],true))
+-- print(texcharacters.toutf([[{\"{e}}]],true))
+-- print(texcharacters.toutf([[{\" {e}}]],true))
+-- print(texcharacters.toutf([[{\l}]],true))
+-- print(texcharacters.toutf([[{\l }]],true))
+-- print(texcharacters.toutf([[\v{r}]],true))
+-- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true))
+-- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))
-function characters.tex.toutf(str,strip)
- if not find(str,"\\") then
- return str
- elseif strip then
- return lpegmatch(both_1,str)
+function texcharacters.safechar(n) -- was characters.safechar
+ local c = data[n]
+ if c and c.contextname then
+ return "\\" .. c.contextname
else
- return lpegmatch(both_2,str)
+ return utfchar(n)
end
end
--- print(characters.tex.toutf([[\~{Z}]],true))
--- print(characters.tex.toutf([[\'\i]],true))
--- print(characters.tex.toutf([[\'{\i}]],true))
--- print(characters.tex.toutf([[\"{e}]],true))
--- print(characters.tex.toutf([[\" {e}]],true))
--- print(characters.tex.toutf([[{\"{e}}]],true))
--- print(characters.tex.toutf([[{\" {e}}]],true))
--- print(characters.tex.toutf([[{\l}]],true))
--- print(characters.tex.toutf([[{\l }]],true))
--- print(characters.tex.toutf([[\v{r}]],true))
--- print(characters.tex.toutf([[fo{\"o}{\ss}ar]],true))
--- print(characters.tex.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))
-
-function characters.tex.defineaccents()
+function texcharacters.defineaccents()
for accent, group in next, accentmapping do
context.dodefineaccentcommand(accent)
for character, mapping in next, group do
@@ -254,3 +425,256 @@ function characters.tex.defineaccents()
end
end
end
+
+-- all kind of initializations
+
+local tex = tex
+local texsetlccode = tex.setlccode
+local texsetuccode = tex.setuccode
+local texsetsfcode = tex.setsfcode
+local texsetcatcode = tex.setcatcode
+
+local contextsprint = context.sprint
+local ctxcatcodes = catcodes.numbers.ctxcatcodes
+
+--[[ldx--
+<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
+use the table. After all, we have this information available anyway.</p>
+--ldx]]--
+
+function commands.makeactive(n,name) --
+ contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
+ -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
+end
+
+function commands.utfchar(c,n)
+ if n then
+ -- contextsprint(c,charfromnumber(n))
+ contextsprint(c,utfchar(n))
+ else
+ -- contextsprint(charfromnumber(c))
+ contextsprint(utfchar(c))
+ end
+end
+
+function commands.safechar(n)
+ local c = data[n]
+ if c and c.contextname then
+ contextsprint("\\" .. c.contextname) -- context[c.contextname]()
+ else
+ contextsprint(utfchar(n))
+ end
+end
+
+tex.uprint = commands.utfchar
+
+local forbidden = tohash { -- at least now
+ 0x00A0,
+ 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D,
+ 0x202F,
+ 0x205F,
+ -- 0xFEFF,
+}
+
+function characters.define(tobelettered, tobeactivated) -- catcodetables
+
+ if trace_defining then
+ report_defining("defining active character commands")
+ end
+
+ local activated, a = { }, 0
+
+ for u, chr in next, data do -- these will be commands
+ local fallback = chr.fallback
+ if fallback then
+ contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}")
+ a = a + 1
+ activated[a] = u
+ else
+ local contextname = chr.contextname
+ if contextname then
+ local category = chr.category
+ if is_character[category] then
+ if chr.unicodeslot < 128 then
+ if is_letter[category] then
+ contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
+ else
+ contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s
+ end
+ else
+ contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
+ end
+ elseif is_command[category] and not forbidden[u] then
+ contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}")
+ a = a + 1
+ activated[a] = u
+ end
+ end
+ end
+ end
+
+ if tobelettered then -- shared
+ local saved = tex.catcodetable
+ for i=1,#tobelettered do
+ tex.catcodetable = tobelettered[i]
+ if trace_defining then
+ report_defining("defining letters (global, shared)")
+ end
+ for u, chr in next, data do
+ if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then
+ texsetcatcode(u,11)
+ end
+ local range = chr.range
+ if range then
+ for i=1,range.first,range.last do -- tricky as not all are letters
+ texsetcatcode(i,11)
+ end
+ end
+ end
+ texsetcatcode(0x200C,11) -- non-joiner
+ texsetcatcode(0x200D,11) -- joiner
+ for k, v in next, blocks do
+ if v.catcode == "letter" then
+ for i=v.first,v.last do
+ texsetcatcode(i,11)
+ end
+ end
+ end
+ end
+ tex.catcodetable = saved
+ end
+
+ local nofactivated = #tobeactivated
+ if tobeactivated and nofactivated > 0 then
+ for i=1,nofactivated do
+ local u = activated[i]
+ if u then
+ report_defining("character %U is active in set %a, containing %a",u,data[u].description,tobeactivated)
+ end
+ end
+ local saved = tex.catcodetable
+ for i=1,#tobeactivated do
+ local vector = tobeactivated[i]
+ if trace_defining then
+ report_defining("defining %a active characters in vector %a",nofactivated,vector)
+ end
+ tex.catcodetable = vector
+ for i=1,nofactivated do
+ local u = activated[i]
+ if u then
+ texsetcatcode(u,13)
+ end
+ end
+ end
+ tex.catcodetable = saved
+ end
+
+end
+
+--[[ldx--
+<p>Setting the lccodes is also done in a loop over the data table.</p>
+--ldx]]--
+
+local sfmode = "unset" -- unset, traditional, normal
+
+function characters.setcodes()
+ if trace_defining then
+ report_defining("defining lc and uc codes")
+ end
+ local traditional = sfstate == "traditional" or sfstate == "unset"
+ for code, chr in next, data do
+ local cc = chr.category
+ if is_letter[cc] then
+ local range = chr.range
+ if range then
+ for i=range.first,range.last do
+ texsetcatcode(i,11) -- letter
+ texsetlccode(i,i,i) -- self self
+ end
+ else
+ local lc, uc = chr.lccode, chr.uccode
+ if not lc then
+ chr.lccode, lc = code, code
+ elseif type(lc) == "table" then
+ lc = code
+ end
+ if not uc then
+ chr.uccode, uc = code, code
+ elseif type(uc) == "table" then
+ uc = code
+ end
+ texsetcatcode(code,11) -- letter
+ texsetlccode(code,lc,uc)
+ if traditional and cc == "lu" then
+ texsetsfcode(code,999)
+ end
+ end
+ elseif is_mark[cc] then
+ texsetlccode(code,code,code) -- for hyphenation
+ end
+ end
+ if traditional then
+ sfstate = "traditional"
+ end
+end
+
+-- If this is something that is not documentwide and used a lot, then we
+-- need a more clever approach (trivial but not now).
+
+local function setuppersfcodes(v,n)
+ if sfstate ~= "unset" then
+ report_defining("setting uppercase sf codes to %a",n)
+ for code, chr in next, data do
+ if chr.category == "lu" then
+ texsetsfcode(code,n)
+ end
+ end
+ end
+ sfstate = v
+end
+
+directives.register("characters.spaceafteruppercase",function(v)
+ if v == "traditional" then
+ setuppersfcodes(v,999)
+ elseif v == "normal" then
+ setuppersfcodes(v,1000)
+ end
+end)
+
+-- tex
+
+function commands.chardescription(slot)
+ local d = data[slot]
+ if d then
+ context(d.description)
+ end
+end
+
+-- xml
+
+characters.activeoffset = 0x10000 -- there will be remapped in that byte range
+
+function commands.remapentity(chr,slot)
+ contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
+end
+
+-- xml.entities = xml.entities or { }
+--
+-- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
+--
+-- function characters.setmkiventities()
+-- local entities = xml.entities
+-- entities.lt = "<"
+-- entities.amp = "&"
+-- entities.gt = ">"
+-- end
+--
+-- function characters.setmkiientities()
+-- local entities = xml.entities
+-- entities.lt = utfchar(characters.activeoffset + utfbyte("<"))
+-- entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
+-- entities.gt = utfchar(characters.activeoffset + utfbyte(">"))
+-- end
+
+commands.definecatcodetable = characters.define
+commands.setcharactercodes = characters.setcodes
diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua
index 98a780dcd..fcd300f6b 100644
--- a/tex/context/base/char-utf.lua
+++ b/tex/context/base/char-utf.lua
@@ -6,11 +6,6 @@ if not modules then modules = { } end modules ['char-utf'] = {
license = "see context related readme files"
}
--- todo: trackers
--- todo: no longer special characters (high) here, only needed in special cases and
--- these don't go through this file anyway
--- graphemes: basic symbols
-
--[[ldx--
<p>When a sequence of <l n='utf'/> characters enters the application, it may be
neccessary to collapse subsequences into their composed variant.</p>
@@ -24,44 +19,46 @@ of output (for instance <l n='pdf'/>).</p>
over a string.</p>
--ldx]]--
-local gmatch, gsub, find = string.gmatch, string.gsub, string.find
+local gsub, find = string.gsub, string.find
local concat, sortedhash, keys, sort = table.concat, table.sortedhash, table.keys, table.sort
local utfchar, utfbyte, utfcharacters, utfvalues = utf.char, utf.byte, utf.characters, utf.values
-local allocate = utilities.storage.allocate
-local lpegmatch, lpegpatterns, P, Cs, Cmt, Ct = lpeg.match, lpeg.patterns, lpeg.P, lpeg.Cs, lpeg.Cmt, lpeg.Ct
+local P, Cs, Cmt, Ct = lpeg.P, lpeg.Cs, lpeg.Cmt, lpeg.Ct
+
+if not characters then require("char-def") end
+if not characters.blocks then require("char-ini") end
+local lpegmatch = lpeg.match
+local lpegpatterns = lpeg.patterns
local p_utf8character = lpegpatterns.utf8character
local utfchartabletopattern = lpeg.utfchartabletopattern
-if not characters then
- require("char-def")
-end
+local allocate = utilities.storage.allocate or function() return { } end
-local charfromnumber = characters.fromnumber
+local charfromnumber = characters.fromnumber
-characters = characters or { }
-local characters = characters
+characters = characters or { }
+local characters = characters
-local graphemes = allocate()
-characters.graphemes = graphemes
+local graphemes = allocate()
+characters.graphemes = graphemes
-local collapsed = allocate()
-characters.collapsed = collapsed
+local collapsed = allocate()
+characters.collapsed = collapsed
-local combined = allocate()
-characters.combined = combined
+local combined = allocate()
+characters.combined = combined
-local decomposed = allocate()
-characters.decomposed = decomposed
+local decomposed = allocate()
+characters.decomposed = decomposed
-local mathpairs = allocate()
-characters.mathpairs = mathpairs
+local mathpairs = allocate()
+characters.mathpairs = mathpairs
-local filters = allocate()
-characters.filters = filters
+local filters = allocate()
+characters.filters = filters
-local utffilters = { }
-characters.filters.utf = utffilters
+local utffilters = { }
+characters.filters.utf = utffilters
-- is characters.combined cached?
@@ -221,92 +218,28 @@ end
characters.initialize = initialize
--[[ldx--
-<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to
-8-bit. This is handled in the <l n='luatex'/> engine itself.</p>
-
-<p>This leaves us problems with characters that are specific to <l n='tex'/> like
-<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files
-are sensitive for to a private area (while writing to a utility file) and revert then
-to their original slot when we read in such a file. Instead of reverting, we can (when
-we resolve characters to glyphs) map them to their right glyph there. For this purpose
-we can use the private planes 0x0F0000 and 0x100000.</p>
---ldx]]--
-
-local low = allocate()
-local high = allocate()
-local escapes = allocate()
-local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"
-
-local private = {
- low = low,
- high = high,
- escapes = escapes,
-}
-
-utffilters.private = private
-
-local tohigh = lpeg.replacer(low) -- frozen, only for basic tex
-local tolow = lpeg.replacer(high) -- frozen, only for basic tex
-
-lpegpatterns.utftohigh = tohigh
-lpegpatterns.utftolow = tolow
-
-function utffilters.harden(str)
- return lpegmatch(tohigh,str)
-end
-
-function utffilters.soften(str)
- return lpegmatch(tolow,str)
-end
-
-local function set(ch)
- local cb
- if type(ch) == "number" then
- cb, ch = ch, utfchar(ch)
- else
- cb = utfbyte(ch)
- end
- if cb < 256 then
- escapes[ch] = "\\" .. ch
- low[ch] = utfchar(0x0F0000 + cb)
- if ch == "%" then
- ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
- end
- high[utfchar(0x0F0000 + cb)] = ch
- end
-end
-
-private.set = set
-
--- function private.escape (str) return gsub(str,"(.)", escapes) end
--- function private.replace(str) return utfgsub(str,"(.)", low ) end
--- function private.revert (str) return utfgsub(str,"(.)", high ) end
-
-private.escape = utf.remapper(escapes)
-private.replace = utf.remapper(low)
-private.revert = utf.remapper(high)
-
-for ch in gmatch(special,".") do set(ch) end
-
---[[ldx--
-<p>We get a more efficient variant of this when we integrate
-replacements in collapser. This more or less renders the previous
-private code redundant. The following code is equivalent but the
-first snippet uses the relocated dollars.</p>
-
-<typing>
-[󰀤x󰀤] [$x$]
-</typing>
-
<p>The next variant has lazy token collecting, on a 140 page mk.tex this saves
about .25 seconds, which is understandable because we have no graphemes and
not collecting tokens is not only faster but also saves garbage collecting.
</p>
--ldx]]--
-local skippable = table.tohash { "mkiv", "mkvi", "mkix", "mkxi" }
+local skippable = { }
local filesuffix = file.suffix
+function utffilters.setskippable(suffix,value)
+ if value == nil then
+ value = true
+ end
+ if type(suffix) == "table" then
+ for i=1,#suffix do
+ skippable[suffix[i]] = value
+ end
+ else
+ skippable[suffix] = value
+ end
+end
+
-- function utffilters.collapse(str,filename) -- we can make high a seperate pass (never needed with collapse)
-- if skippable[filesuffix(filename)] then
-- return str
@@ -406,7 +339,7 @@ local filesuffix = file.suffix
-- return concat(tokens) -- seldom called
-- end
-- elseif nstr > 0 then
--- return high[str] or str -- thsi will go from here
+-- return high[str] or str -- this will go from here
-- end
-- end
-- return str
@@ -420,7 +353,7 @@ local function prepare()
if initialize then
initialize()
end
- local tree = utfchartabletopattern(keys(collapsed))
+ local tree = utfchartabletopattern(collapsed)
p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
end
@@ -487,7 +420,7 @@ end
-- if initialize then
-- initialize()
-- end
--- local tree = utfchartabletopattern(keys(decomposed))
+-- local tree = utfchartabletopattern(decomposed)
-- finder = lpeg.finder(tree,false,true)
-- replacer = lpeg.replacer(tree,decomposed,false,true)
-- end
@@ -503,11 +436,11 @@ local function prepare()
if initialize then
initialize()
end
- local tree = utfchartabletopattern(keys(decomposed))
+ local tree = utfchartabletopattern(decomposed)
p_decompose = Cs((tree/decomposed + p_utf8character)^0 * P(-1))
end
-function utffilters.decompose(str) -- 3 to 4 times faster than the above
+function utffilters.decompose(str,filename) -- 3 to 4 times faster than the above
if not p_decompose then
prepare()
end
@@ -619,12 +552,12 @@ local function prepare()
hash[utfchar(k)] = { utfchar(k), combining, 0 } -- slot 3 can be used in sort
end
end
- local e = utfchartabletopattern(keys(exceptions))
- local p = utfchartabletopattern(keys(hash))
+ local e = utfchartabletopattern(exceptions)
+ local p = utfchartabletopattern(hash)
p_reorder = Cs((e/exceptions + Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1)
end
-function utffilters.reorder(str)
+function utffilters.reorder(str,filename)
if not p_reorder then
prepare()
end
@@ -638,141 +571,6 @@ function utffilters.reorder(str)
return str
end
--- --
-
-local sequencers = utilities.sequencers
-
-if sequencers then
-
- local textfileactions = resolvers.openers.helpers.textfileactions
- local textlineactions = resolvers.openers.helpers.textlineactions
-
- sequencers.appendaction (textfileactions,"system","characters.filters.utf.reorder")
- sequencers.disableaction(textfileactions,"characters.filters.utf.reorder")
-
- sequencers.appendaction (textlineactions,"system","characters.filters.utf.reorder")
- sequencers.disableaction(textlineactions,"characters.filters.utf.reorder")
-
- sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse")
- sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
-
- sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose")
- sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
-
- function characters.filters.utf.enable()
- sequencers.enableaction(textfileactions,"characters.filters.utf.reorder")
- sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
- sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
- end
-
- local function configure(what,v)
- if not v then
- sequencers.disableaction(textfileactions,what)
- sequencers.disableaction(textlineactions,what)
- elseif v == "line" then
- sequencers.disableaction(textfileactions,what)
- sequencers.enableaction (textlineactions,what)
- else -- true or text
- sequencers.enableaction (textfileactions,what)
- sequencers.disableaction(textlineactions,what)
- end
- end
-
- directives.register("filters.utf.reorder", function(v)
- configure("characters.filters.utf.reorder",v)
- end)
-
- directives.register("filters.utf.collapse", function(v)
- configure("characters.filters.utf.collapse",v)
- end)
-
- directives.register("filters.utf.decompose", function(v)
- configure("characters.filters.utf.decompose",v)
- end)
-
-end
-
--- Faster when we deal with lots of data but somewhat complicated by the fact that we want to be
--- downward compatible .. so maybe some day I'll simplify it. We seldom have large quantities of
--- text.
-
--- local p_processed = nil -- so we can reset if needed
---
--- function utffilters.preprocess(str,filename)
--- if not p_processed then
--- if initialize then
--- initialize()
--- end
--- local merged = table.merged(collapsed,decomposed)
--- local tree = utfchartabletopattern(keys(merged))
--- p_processed = Cs((tree/merged + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
--- local tree = utfchartabletopattern(keys(collapsed))
--- p_collapse = Cs((tree/collapsed + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
--- local tree = utfchartabletopattern(keys(decomposed))
--- p_decompose = Cs((tree/decomposed + lpegpatterns.utf8char)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
--- end
--- if not str or #str == "" or #str == 1 then
--- return str
--- elseif filename and skippable[filesuffix(filename)] then -- we could hash the collapsables or do a quicker test
--- return str
--- else
--- return lpegmatch(p_processed,str) or str
--- end
--- end
---
--- local sequencers = utilities.sequencers
---
--- if sequencers then
---
--- local textfileactions = resolvers.openers.helpers.textfileactions
---
--- local collapse, decompose = false, false
---
--- sequencers.appendaction (textfileactions,"system","characters.filters.utf.preprocess")
--- sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
---
--- local function checkable()
--- if decompose then
--- if collapse then
--- sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
--- sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
--- sequencers.enableaction (textfileactions,"characters.filters.utf.preprocess")
--- else
--- sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
--- sequencers.enableaction (textfileactions,"characters.filters.utf.decompose")
--- sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
--- end
--- else
--- if collapse then
--- sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
--- sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
--- sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
--- else
--- sequencers.disableaction(textfileactions,"characters.filters.utf.collapse")
--- sequencers.disableaction(textfileactions,"characters.filters.utf.decompose")
--- sequencers.disableaction(textfileactions,"characters.filters.utf.preprocess")
--- end
--- end
--- end
---
--- function characters.filters.utf.enable()
--- collapse = true
--- decompose = true
--- checkable()
--- end
---
--- directives.register("filters.utf.collapse", function(v)
--- collapse = v
--- checkable()
--- end)
---
--- directives.register("filters.utf.decompose", function(v)
--- decompose = v
--- checkable()
--- end)
---
--- end
-
-- local collapse = utffilters.collapse
-- local decompose = utffilters.decompose
-- local preprocess = utffilters.preprocess
@@ -815,3 +613,5 @@ end
-- local done = utffilters.reorder(test)
--
-- print(test,done,test==done,false)
+
+return characters
diff --git a/tex/context/base/char-utf.mkiv b/tex/context/base/char-utf.mkiv
index 280e7ef6d..381360905 100644
--- a/tex/context/base/char-utf.mkiv
+++ b/tex/context/base/char-utf.mkiv
@@ -22,22 +22,15 @@
\unprotect
+\registerctxluafile{char-def}{1.001}
+\registerctxluafile{char-ini}{1.001}
\registerctxluafile{char-utf}{1.001}
+\registerctxluafile{char-cjk}{1.001}
%D We enable collapsing (combining characters) by default, but
%D since the source files are rather simple, we postpone the
%D initialization till runtime.
-% resolvers.filters.install('utf',characters.filters.utf.collapse)
-
-% \appendtoks
-% \ctxlua{
-% local textfileactions = resolvers.openers.helpers.textfileactions
-% utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse")
-% utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose")
-% }%
-% \to \everyjob
-
\appendtoks
\ctxlua{characters.filters.utf.enable()}%
\to \everyjob
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index 22bda98b0..c9d8a19e0 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.
-\newcontextversion{2014.07.04 15:55}
+\newcontextversion{2014.07.06 21:17}
%D This file is loaded at runtime, thereby providing an excellent place for
%D hacks, patches, extensions and new features.
diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf
index bb3c1a555..3fddcdb4c 100644
--- a/tex/context/base/context-version.pdf
+++ b/tex/context/base/context-version.pdf
Binary files differ
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index f92d65902..468493ce1 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -28,7 +28,7 @@
%D up and the dependencies are more consistent.
\edef\contextformat {\jobname}
-\edef\contextversion{2014.07.04 15:55}
+\edef\contextversion{2014.07.06 21:17}
\edef\contextkind {beta}
%D For those who want to use this:
@@ -112,9 +112,9 @@
\loadmarkfile{supp-dir}
-\loadmarkfile{char-ini}
-\loadmarkfile{char-utf}
-\loadmarkfile{char-act}
+\loadmarkfile{char-utf} % generic code (i.e. not much tex) ... could become unic-ini
+\loadmarkfile{char-ini} % tex / context specific
+\loadmarkfile{char-act} % even more specific
\loadmarkfile{mult-ini}
\loadmarkfile{mult-sys}
diff --git a/tex/context/base/font-enc.lua b/tex/context/base/font-enc.lua
index 5305f0736..2e8b722de 100644
--- a/tex/context/base/font-enc.lua
+++ b/tex/context/base/font-enc.lua
@@ -8,6 +8,7 @@ if not modules then modules = { } end modules ['font-enc'] = {
-- this module is obsolete
+local next = next
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
local setmetatableindex = table.setmetatableindex
@@ -125,7 +126,12 @@ function encodings.make_unicode_vector()
end
end
for name, code in next, characters.synonyms do
- vector[code], hash[name] = name, code
+ if not vector[code] then
+ vector[code] = name
+ end
+ if not hash[name] then
+ hash[name] = code
+ end
end
return containers.write(encodings.cache, 'unicode', { name='unicode', tag='unicode', vector=vector, hash=hash })
end
diff --git a/tex/context/base/font-pre.mkiv b/tex/context/base/font-pre.mkiv
index fc6eb289e..cb5b193f6 100644
--- a/tex/context/base/font-pre.mkiv
+++ b/tex/context/base/font-pre.mkiv
@@ -100,14 +100,14 @@
features=no]
\definefontfeature
- [semetic-complete]
+ [semitic-complete]
[mode=node,analyze=yes,language=dflt,ccmp=yes,
init=yes,medi=yes,fina=yes,isol=yes,
mark=yes,mkmk=yes,kern=yes,curs=yes,
liga=yes,dlig=yes,rlig=yes,clig=yes,calt=yes]
\definefontfeature
- [semetic-simple]
+ [semitic-simple]
[mode=node,analyze=yes,language=dflt,ccmp=yes,
init=yes,medi=yes,fina=yes,isol=yes,
mark=yes,mkmk=yes,kern=yes,curs=yes,
@@ -115,22 +115,22 @@
\definefontfeature
[arabic]
- [semetic-complete]
+ [semitic-complete]
[script=arab]
\definefontfeature
[hebrew]
- [semetic-complete]
+ [semitic-complete]
[script=hebr]
\definefontfeature
[simplearabic]
- [semetic-simple]
+ [semitic-simple]
[script=arab]
\definefontfeature
[simplehebrew]
- [semetic-simple]
+ [semitic-simple]
[script=hebr]
% \definefont [DevaOne] [file:chandas.ttf*devanagari-one at 12pt]
diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua
index c203d8044..79e75a7b7 100644
--- a/tex/context/base/l-lpeg.lua
+++ b/tex/context/base/l-lpeg.lua
@@ -897,17 +897,35 @@ end
function lpeg.utfchartabletopattern(list) -- goes to util-lpg
local tree = { }
local hash = { }
- for i=1,#list do
- local t = tree
- for c in gmatch(list[i],".") do
- local tc = t[c]
- if not tc then
- tc = { }
- t[c] = tc
+ local n = #list
+ if n == 0 then
+ -- we could always use this branch
+ for s in next, list do
+ local t = tree
+ for c in gmatch(s,".") do
+ local tc = t[c]
+ if not tc then
+ tc = { }
+ t[c] = tc
+ end
+ t = tc
+ end
+ hash[t] = s
+ end
+ else
+ for i=1,n do
+ local t = tree
+ local s = list[i]
+ for c in gmatch(s,".") do
+ local tc = t[c]
+ if not tc then
+ tc = { }
+ t[c] = tc
+ end
+ t = tc
end
- t = tc
+ hash[t] = s
end
- hash[t] = list[i]
end
return make(tree,hash)
end
diff --git a/tex/context/base/publ-aut.lua b/tex/context/base/publ-aut.lua
index b35af1bcc..0167d66e7 100644
--- a/tex/context/base/publ-aut.lua
+++ b/tex/context/base/publ-aut.lua
@@ -233,6 +233,7 @@ local function the_initials(initials,symbol)
end
local ctx_btxsetconcat = context.btxsetconcat
+local ctx_btxsetauthorindex = context.btxsetauthorindex
local ctx_btxsetoverflow = context.btxsetoverflow
local ctx_btxsetinitials = context.btxsetinitials
local ctx_btxsetfirstnames = context.btxsetfirstnames
@@ -248,6 +249,56 @@ local ctx_btxstopauthor = context.btxstopauthor
local concatstate = publications.concatstate
local f_invalid = formatters["<invalid %s: %s>"]
+local currentauthordata = nil
+local currentauthorsymbol = nil
+
+local manipulators = typesetters.manipulators
+local splitmanipulation = manipulators.splitspecification
+local applymanipulation = manipulators.applyspecification
+local manipulatormethods = manipulators.methods
+
+local function value(i,field)
+ if currentauthordata then
+ local entry = currentauthordata[i]
+ if entry then
+ local value = entry[field]
+ if value and #value > 0 then
+ return value
+ end
+ end
+ end
+end
+
+function commands.btx_a_i(i) local v = value(i,"initials") if v then context(concat(the_initials(v,currentauthorsymbol or "."))) end end
+function commands.btx_a_f(i) local v = value(i,"firstnames") if v then context(concat(v," ")) end end
+function commands.btx_a_j(i) local v = value(i,"juniors") if v then context(concat(v," ")) end end
+function commands.btx_a_s(i) local v = value(i,"surnames") if v then context(concat(v," ")) end end
+function commands.btx_a_v(i) local v = value(i,"vons") if v then context(concat(v," ")) end end
+
+function commands.btxauthorfield(i,field)
+ if currentauthordata then
+ local entry = currentauthordata[i]
+ if entry then
+ local manipulator, field = splitmanipulation(field)
+ local value = entry[field]
+ if not value or #value == 0 then
+ -- value, no need for message
+ elseif manipulator then
+ for i=1,#value do
+ if i > 1 then
+ context(" ") -- symbol ?
+ end
+ context(applymanipulation(manipulator,value) or value)
+ end
+ elseif field == "initials" then
+ context(concat(the_initials(value,currentauthorsymbol or ".")))
+ else
+ context(concat(value," "))
+ end
+ end
+ end
+end
+
function commands.btxauthor(dataset,tag,field,settings)
local ds = datasets[dataset]
if not ds then
@@ -279,30 +330,32 @@ function commands.btxauthor(dataset,tag,field,settings)
if max > etallimit and etaldisplay < max then
max = etaldisplay
end
+ currentauthordata = split
+ currentauthorsymbol = symbol
for i=1,max do
- ctx_btxstartauthor() -- i, max
+ ctx_btxstartauthor(i,max)
ctx_btxsetconcat(concatstate(i,max))
ctx_btxsetauthorvariant(combiner)
local author = split[i]
local initials = author.initials
- if initials then
- ctx_btxsetinitials(concat(the_initials(initials,symbol)," "))
+ if initials and #initials > 0 then
+ ctx_btxsetinitials() -- (concat(the_initials(initials,symbol)," "))
end
local firstnames = author.firstnames
- if firstnames then
- ctx_btxsetfirstnames(concat(firstnames," "))
+ if firstnames and #firstnames > 0 then
+ ctx_btxsetfirstnames() -- (concat(firstnames," "))
end
local vons = author.vons
- if vons then
- ctx_btxsetvons(concat(vons," "))
+ if vons and #vons > 0 then
+ ctx_btxsetvons() -- (concat(vons," "))
end
local surnames = author.surnames
- if surnames then
- ctx_btxsetsurnames(concat(surnames," "))
+ if surnames and #surnames > 0 then
+ ctx_btxsetsurnames() -- (concat(surnames," "))
end
local juniors = author.juniors
- if juniors then
- ctx_btxsetjuniors(concat(juniors," "))
+ if juniors and #juniors > 0 then
+ ctx_btxsetjuniors() -- (concat(juniors," "))
end
ctx_btxsetup(combiner)
ctx_btxstopauthor()
@@ -317,6 +370,7 @@ end
-- pays off.
local compare = sorters.comparers.basic -- (a,b)
+-- local compare = sorters.basicsorter -- (a,b)
local strip = sorters.strip
local splitter = sorters.splitters.utf
@@ -480,7 +534,7 @@ function authors.sorted(dataset,list,sorttype) -- experimental
if #valid == 0 or #valid ~= #list then
return list
else
- sorters.sort(valid,compare)
+ sorters.sort(valid,function(a,b) return a ~= b and compare(a,b) == -1 end)
for i=1,#valid do
valid[i] = valid[i].index
end
diff --git a/tex/context/base/publ-imp-author.mkvi b/tex/context/base/publ-imp-author.mkvi
index e21353f63..6326ac3d8 100644
--- a/tex/context/base/publ-imp-author.mkvi
+++ b/tex/context/base/publ-imp-author.mkvi
@@ -24,28 +24,13 @@
% You can adapt these setups to your liking, for instance as:
-% \startsetups btx:cite:author:normal
-% \fastsetup{btx:cite:author:concat}
-% \ifx\currentbtxfirstnames\empty \else
-% \begingroup
-% \bf
-% \currentbtxfirstnames
-% \endgroup
-% \btxcitevariantparameter{firstnamesep}
-% \fi
-% \ifx\currentbtxvons\empty \else
-% \currentbtxvons
-% \btxcitevariantparameter{vonsep}
-% \fi
-% \ifx\currentbtxsurnames\empty \else
-% \currentbtxsurnames
-% \ifx\currentbtxjuniors\empty \else
-% \btxcitevariantparameter{juniorsep}
-% \currentbtxjuniors
-% \fi
-% \fi
-% \fastsetup{btx:cite:author:etaltext}
-% \stopsetups
+% these can be used instead of the macros and they accept manipulator prefixes
+%
+% \currentbtxinitials : \btxauthorfield{initials}
+% \currentbtxfirstnames : \btxauthorfield{firstnames}
+% \currentbtxvons : \btxauthorfield{vons}
+% \currentbtxsurnames : \btxauthorfield{surnames}
+% \currentbtxjuniors : \btxauthorfield{juniors}
\startsetups \s!btx:\s!cite:\s!author:concat
\ifcase\currentbtxconcat \or \or
diff --git a/tex/context/base/publ-ini.mkiv b/tex/context/base/publ-ini.mkiv
index 5f8e335fe..bf8c29363 100644
--- a/tex/context/base/publ-ini.mkiv
+++ b/tex/context/base/publ-ini.mkiv
@@ -318,12 +318,14 @@
% \let\btxsetdataset\setbtxdataset
% \let\btxsetentry \setbtxentry
-\def\btxfield #1{\ctxcommand{btxfield("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdetail #1{\ctxcommand{btxdetail("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxflush #1{\ctxcommand{btxflush("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdoifelse#1{\ctxcommand{btxdoifelse("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdoif #1{\ctxcommand{btxdoif("\currentbtxdataset","\currentbtxtag","#1")}}
-\def\btxdoifnot #1{\ctxcommand{btxdoifnot("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxfield #1{\ctxcommand{btxfield("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdetail #1{\ctxcommand{btxdetail("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxauthorfield#1{\ctxcommand{btxauthorfield(\number\currentbtxauthorindex,"#1")}}
+\def\btxflush #1{\ctxcommand{btxflush("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdoifelse #1{\ctxcommand{btxdoifelse("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdoif #1{\ctxcommand{btxdoif("\currentbtxdataset","\currentbtxtag","#1")}}
+\def\btxdoifnot #1{\ctxcommand{btxdoifnot("\currentbtxdataset","\currentbtxtag","#1")}}
+
\let\btxsetup\fastsetup
@@ -353,20 +355,41 @@
\let\currentbtxcombis \empty \unexpanded\def\btxsetcombis {\def\currentbtxcombis}
\let\currentbtxdataset \empty \unexpanded\def\btxsetdataset {\def\currentbtxdataset}
\let\currentbtxfirst \empty \unexpanded\def\btxsetfirst {\def\currentbtxfirst}
-\let\currentbtxfirstnames \empty \unexpanded\def\btxsetfirstnames {\def\currentbtxfirstnames}
-\let\currentbtxinitials \empty \unexpanded\def\btxsetinitials {\def\currentbtxinitials}
\let\currentbtxinternal \empty \unexpanded\def\btxsetinternal {\def\currentbtxinternal}
-\let\currentbtxjuniors \empty \unexpanded\def\btxsetjuniors {\def\currentbtxjuniors}
\let\currentbtxlanguage \empty \unexpanded\def\btxsetlanguage {\def\currentbtxlanguage}
\let\currentbtxsecond \empty \unexpanded\def\btxsetsecond {\def\currentbtxsecond}
-\let\currentbtxsurnames \empty \unexpanded\def\btxsetsurnames {\def\currentbtxsurnames}
\let\currentbtxtag \empty \unexpanded\def\btxsettag {\def\currentbtxtag}
-\let\currentbtxvons \empty \unexpanded\def\btxsetvons {\def\currentbtxvons}
\let\currentbtxauthorvariant\v!normal \unexpanded\def\btxsetauthorvariant{\def\currentbtxauthorvariant}
-\newconstant\currentbtxoverflow \unexpanded\def\btxsetoverflow#1{\currentbtxoverflow#1\relax}
-\newconstant\currentbtxconcat \unexpanded\def\btxsetconcat #1{\currentbtxconcat #1\relax}
-\newconstant\currentbtxcount \unexpanded\def\btxsetcount #1{\currentbtxcount #1\relax}
+%let\currentbtxfirstnames \empty \unexpanded\def\btxsetfirstnames {\def\currentbtxfirstnames}
+%let\currentbtxinitials \empty \unexpanded\def\btxsetinitials {\def\currentbtxinitials}
+%let\currentbtxjuniors \empty \unexpanded\def\btxsetjuniors {\def\currentbtxjuniors}
+%let\currentbtxsurnames \empty \unexpanded\def\btxsetsurnames {\def\currentbtxsurnames}
+%let\currentbtxvons \empty \unexpanded\def\btxsetvons {\def\currentbtxvons}
+
+%unexpanded\def\getcurrentbtxfirstnames{\ctxcommand{btxauthorfield("firstnames")}
+%unexpanded\def\getcurrentbtxinitials {\ctxcommand{btxauthorfield("initials")}
+%unexpanded\def\getcurrentbtxjuniors {\ctxcommand{btxauthorfield("juniors")}
+%unexpanded\def\getcurrentbtxsurnames {\ctxcommand{btxauthorfield("surnames")}
+%unexpanded\def\getcurrentbtxvons {\ctxcommand{btxauthorfield("vons")}
+
+\unexpanded\def\currentbtxfirstnames_indeed{\ctxcommand{btx_a_f(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxinitials_indeed {\ctxcommand{btx_a_i(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxjuniors_indeed {\ctxcommand{btx_a_j(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxsurnames_indeed {\ctxcommand{btx_a_s(\number\currentbtxauthorindex)}}
+\unexpanded\def\currentbtxvons_indeed {\ctxcommand{btx_a_v(\number\currentbtxauthorindex)}}
+
+\let\currentbtxfirstnames \empty \unexpanded\def\btxsetfirstnames{\let\currentbtxfirstnames\currentbtxfirstnames_indeed}
+\let\currentbtxinitials \empty \unexpanded\def\btxsetinitials {\let\currentbtxinitials \currentbtxinitials_indeed }
+\let\currentbtxjuniors \empty \unexpanded\def\btxsetjuniors {\let\currentbtxjuniors \currentbtxjuniors_indeed }
+\let\currentbtxsurnames \empty \unexpanded\def\btxsetsurnames {\let\currentbtxsurnames \currentbtxsurnames_indeed }
+\let\currentbtxvons \empty \unexpanded\def\btxsetvons {\let\currentbtxvons \currentbtxvons_indeed }
+
+\newconstant\currentbtxoverflow \unexpanded\def\btxsetoverflow #1{\currentbtxoverflow #1\relax}
+\newconstant\currentbtxconcat \unexpanded\def\btxsetconcat #1{\currentbtxconcat #1\relax}
+\newconstant\currentbtxcount \unexpanded\def\btxsetcount #1{\currentbtxcount #1\relax}
+\newconstant\currentbtxauthorindex %unexpanded\def\btxsetauthorindex#1{\currentbtxauthorindex#1\relax} % passed directly
+\newconstant\currentbtxauthorcount %unexpanded\def\btxsetauthorcount#1{\currentbtxauthorcount#1\relax} % passed directly
\def\currentbtxauthorvariant{normal}
@@ -381,17 +404,17 @@
\let\currentbtxdataset \empty}
\unexpanded\def\btxcitereset % check for less .. not all resets needed
- {\let \currentbtxfirst \empty
- \let \currentbtxsecond \empty
- \let \currentbtxinternal \empty
- \let \currentbtxbacklink \empty
- \let \currentbtxbacktrace\empty % not used here
- \let \currentbtxlanguage \empty
- \let \currentbtxdataset \empty
- \let \currentbtxtag \empty
- \setconstant\currentbtxoverflow \zerocount
- \setconstant\currentbtxconcat \zerocount
- \setconstant\currentbtxcount \zerocount}
+ {\let \currentbtxfirst \empty
+ \let \currentbtxsecond \empty
+ \let \currentbtxinternal \empty
+ \let \currentbtxbacklink \empty
+ \let \currentbtxbacktrace \empty % not used here
+ \let \currentbtxlanguage \empty
+ \let \currentbtxdataset \empty
+ \let \currentbtxtag \empty
+ \setconstant\currentbtxoverflow \zerocount
+ \setconstant\currentbtxconcat \zerocount
+ \setconstant\currentbtxcount \zerocount}
%D Tracing
@@ -701,8 +724,13 @@
})}%
\endgroup}
-\unexpanded\def\btxstartauthor{\begingroup}
-\unexpanded\def\btxstopauthor {\endgroup}
+\unexpanded\def\btxstartauthor#1#2%
+ {\begingroup
+ \currentbtxauthorindex#1\relax
+ \currentbtxauthorcount#2\relax}
+
+\unexpanded\def\btxstopauthor
+ {\endgroup}
\unexpanded\def\btxciteauthorsetup#1{\fastsetup{\s!btx:\s!cite:\s!author:#1}}
\unexpanded\def\btxlistauthorsetup#1{\fastsetup{\s!btx:\s!list:\s!author:#1}}
@@ -950,16 +978,6 @@
\unexpanded\def\btxcitesetup#1%
{\fastsetup{\s!btx:\s!cite:#1}} % no \btxcitereset as we loose dataset and such
-\unexpanded\def\btxsetfirst {\def\currentbtxfirst}
-\unexpanded\def\btxsetsecond {\def\currentbtxsecond}
-\unexpanded\def\btxsettag {\def\currentbtxtag}
-\unexpanded\def\btxsetdataset {\def\currentbtxdataset}
-%unexpanded\def\btxsetlanguage {\def\currentbtxlanguage}
-\unexpanded\def\btxsetinternal {\def\currentbtxinternal}
-\unexpanded\def\btxsetcount #1{\setconstant\currentbtxcount #1\relax}
-\unexpanded\def\btxsetconcat #1{\setconstant\currentbtxconcat #1\relax}
-\unexpanded\def\btxsetoverflow #1{\setconstant\currentbtxoverflow#1\relax}
-
\unexpanded\def\btxstartsubcite#1% #1 can go
{\begingroup
\btxcitereset % todo: limited set
diff --git a/tex/context/base/regi-ini.lua b/tex/context/base/regi-ini.lua
index 63f45a0b1..9484db7c7 100644
--- a/tex/context/base/regi-ini.lua
+++ b/tex/context/base/regi-ini.lua
@@ -390,7 +390,7 @@ function regimes.cleanup(regime,str)
mapping[split] = v
end
end
- p = Cs((lpeg.utfchartabletopattern(table.keys(mapping))/mapping+P(1))^0)
+ p = Cs((lpeg.utfchartabletopattern(mapping)/mapping+P(1))^0)
else
p = false
end
diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua
index d1eaacd15..ab6ad0649 100644
--- a/tex/context/base/sort-ini.lua
+++ b/tex/context/base/sort-ini.lua
@@ -53,6 +53,7 @@ have language etc properties that then can be used.</p>
local gsub, rep, sub, sort, concat, tohash, format = string.gsub, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format
local utfbyte, utfchar, utfcharacters, utfvalues = utf.byte, utf.char, utf.characters, utf.values
local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
+local P, Cs, R, S, lpegmatch = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.match
local allocate = utilities.storage.allocate
local setmetatableindex = table.setmetatableindex
@@ -367,6 +368,8 @@ end
-- tricky: { 0, 0, 0 } vs { 0, 0, 0, 0 } => longer wins and mm, pm, zm can have them
+-- inlining and checking first slot first doesn't speed up (the 400K complex author sort)
+
local function basicsort(sort_a,sort_b)
if sort_a and sort_b then
local na = #sort_a
@@ -374,12 +377,14 @@ local function basicsort(sort_a,sort_b)
if na > nb then
na = nb
end
- for i=1,na do
- local ai, bi = sort_a[i], sort_b[i]
- if ai > bi then
- return 1
- elseif ai < bi then
- return -1
+ if na > 0 then
+ for i=1,na do
+ local ai, bi = sort_a[i], sort_b[i]
+ if ai > bi then
+ return 1
+ elseif ai < bi then
+ return -1
+ end
end
end
end
@@ -389,6 +394,10 @@ end
-- todo: compile compare function
local function basic(a,b) -- trace ea and eb
+ if a == b then
+ -- hashed (shared) entries
+ return 0
+ end
local ea, eb = a.split, b.split
local na, nb = #ea, #eb
if na == 0 and nb == 0 then
@@ -484,25 +493,59 @@ function sorters.basicsorter(a,b)
return basic(a,b) == -1
end
+-- local function numify(s)
+-- s = digitsoffset + tonumber(s) -- alternatively we can create range or maybe just hex numbers
+-- if s > digitsmaximum then
+-- s = digitsmaximum
+-- end
+-- return utfchar(s)
+-- end
+--
+-- function sorters.strip(str) -- todo: only letters and such
+-- if str and str ~= "" then
+-- -- todo: make a decent lpeg
+-- str = gsub(str,"\\[\"\'~^`]*","") -- \"e -- hm, too greedy
+-- str = gsub(str,"\\%S*","") -- the rest
+-- str = gsub(str,"%s","\001") -- can be option
+-- str = gsub(str,"[%s%[%](){}%$\"\']*","") -- %s already done
+-- if digits == v_numbers then
+-- str = gsub(str,"(%d+)",numify) -- sort numbers properly
+-- end
+-- return str
+-- else
+-- return ""
+-- end
+-- end
+
local function numify(s)
- s = digitsoffset + tonumber(s) -- alternatively we can create range
- if s > digitsmaximum then
- s = digitsmaximum
+ if digits == v_numbers then
+ return s
+ else
+ s = digitsoffset + tonumber(s) -- alternatively we can create range
+ if s > digitsmaximum then
+ s = digitsmaximum
+ end
+ return utfchar(s)
end
- return utfchar(s)
+end
+
+local pattern = nil
+
+local function prepare()
+ pattern = Cs( (
+ characters.tex.toutfpattern()
+ + lpeg.patterns.whitespace / "\000"
+ + (P("\\") * P(1) * R("az","AZ")^0) / ""
+ + S("[](){}$\"'") / ""
+ + R("09")^1 / numify
+ + P(1)
+ )^0 )
+ return pattern
end
function sorters.strip(str) -- todo: only letters and such
if str and str ~= "" then
- -- todo: make a decent lpeg
- str = gsub(str,"\\[\"\'~^`]*","") -- \"e -- hm, too greedy
- str = gsub(str,"\\%S*","") -- the rest
- str = gsub(str,"%s","\001") -- can be option
- str = gsub(str,"[%s%[%](){}%$\"\']*","") -- %s already done
- if digits == v_numbers then
- str = gsub(str,"(%d+)",numify) -- sort numbers properly
- end
- return str
+ return lpegmatch(pattern or prepare(),str)
else
return ""
end
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 5bfd7eade..233518f5c 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
Binary files differ
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index 1da58153a..85f8ab47a 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
Binary files differ
diff --git a/tex/context/base/x-asciimath.lua b/tex/context/base/x-asciimath.lua
index b3202daa9..0849b42a5 100644
--- a/tex/context/base/x-asciimath.lua
+++ b/tex/context/base/x-asciimath.lua
@@ -829,9 +829,9 @@ local m_right = {
}
local p_left =
- lpeg.utfchartabletopattern(keys(m_left)) / m_left
+ lpeg.utfchartabletopattern(m_left) / m_left
local p_right =
- lpeg.utfchartabletopattern(keys(m_right)) / m_right
+ lpeg.utfchartabletopattern(m_right) / m_right
-- special cases
diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua
index 52a65ea57..22dd8c32b 100644
--- a/tex/generic/context/luatex/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
-- merged file : luatex-fonts-merged.lua
-- parent file : luatex-fonts.lua
--- merge date : 07/04/14 15:55:31
+-- merge date : 07/06/14 21:17:47
do -- begin closure to overcome local limits and interference
@@ -665,17 +665,34 @@ end
function lpeg.utfchartabletopattern(list)
local tree={}
local hash={}
- for i=1,#list do
- local t=tree
- for c in gmatch(list[i],".") do
- local tc=t[c]
- if not tc then
- tc={}
- t[c]=tc
+ local n=#list
+ if n==0 then
+ for s in next,list do
+ local t=tree
+ for c in gmatch(s,".") do
+ local tc=t[c]
+ if not tc then
+ tc={}
+ t[c]=tc
+ end
+ t=tc
+ end
+ hash[t]=s
+ end
+ else
+ for i=1,n do
+ local t=tree
+ local s=list[i]
+ for c in gmatch(s,".") do
+ local tc=t[c]
+ if not tc then
+ tc={}
+ t[c]=tc
+ end
+ t=tc
end
- t=tc
+ hash[t]=s
end
- hash[t]=list[i]
end
return make(tree,hash)
end