summaryrefslogtreecommitdiff
path: root/tex/context/base/char-ini.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/char-ini.lua')
-rw-r--r--tex/context/base/char-ini.lua493
1 files changed, 246 insertions, 247 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index 277967ef3..9f8a0ce11 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -10,8 +10,6 @@ if not modules then modules = { } end modules ['char-ini'] = {
-- we can remove the tag range starting at 0xE0000 (special applications)
-local tex = tex
-
local utfchar, utfbyte, utfvalues, ustring = utf.char, utf.byte, utf.values, utf.ustring
local concat, unpack, tohash = table.concat, table.unpack, table.tohash
local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
@@ -23,14 +21,6 @@ local utf8char = patterns.utf8char
local allocate = utilities.storage.allocate
local mark = utilities.storage.mark
-local texsetlccode = tex.setlccode
-local texsetuccode = tex.setuccode
-local texsetsfcode = tex.setsfcode
-local texsetcatcode = tex.setcatcode
-
-local contextsprint = context.sprint
-local ctxcatcodes = catcodes.numbers.ctxcatcodes
-local texcatcodes = catcodes.numbers.texcatcodes
local setmetatableindex = table.setmetatableindex
@@ -318,7 +308,7 @@ end)
local otfscripts = utilities.storage.allocate()
characters.otfscripts = otfscripts
-table.setmetatableindex(otfscripts,function(t,unicode)
+setmetatableindex(otfscripts,function(t,unicode)
for k, v in next, blocks do
local first, last = v.first, v.last
if unicode >= first and unicode <= last then
@@ -493,7 +483,9 @@ if not characters.fallbacks then
end
-storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks") -- accents and such
+if storage then
+ storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks") -- accents and such
+end
characters.directions = { }
@@ -511,206 +503,10 @@ setmetatableindex(characters.directions,function(t,k)
end)
--[[ldx--
-<p>The <type>context</type> namespace is used to store methods and data
-which is rather specific to <l n='context'/>.</p>
---ldx]]--
-
---[[ldx--
-<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
-use the table. After all, we have this information available anyway.</p>
---ldx]]--
-
-function characters.makeactive(n,name) --
- contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
- -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
-end
-
-function tex.uprint(c,n)
- if n then
- -- contextsprint(c,charfromnumber(n))
- contextsprint(c,utfchar(n))
- else
- -- contextsprint(charfromnumber(c))
- contextsprint(utfchar(c))
- end
-end
-
-local forbidden = tohash { -- at least now
- 0x00A0,
- 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D,
- 0x202F,
- 0x205F,
- -- 0xFEFF,
-}
-
-function characters.define(tobelettered, tobeactivated) -- catcodetables
-
- if trace_defining then
- report_defining("defining active character commands")
- end
-
- local activated, a = { }, 0
-
- for u, chr in next, data do -- these will be commands
- local fallback = chr.fallback
- if fallback then
- contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}")
- a = a + 1
- activated[a] = u
- else
- local contextname = chr.contextname
- if contextname then
- local category = chr.category
- if is_character[category] then
- if chr.unicodeslot < 128 then
- if is_letter[category] then
- contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
- else
- contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s
- end
- else
- contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
- end
- elseif is_command[category] and not forbidden[u] then
- contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}")
- a = a + 1
- activated[a] = u
- end
- end
- end
- end
-
- if tobelettered then -- shared
- local saved = tex.catcodetable
- for i=1,#tobelettered do
- tex.catcodetable = tobelettered[i]
- if trace_defining then
- report_defining("defining letters (global, shared)")
- end
- for u, chr in next, data do
- if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then
- texsetcatcode(u,11)
- end
- local range = chr.range
- if range then
- for i=1,range.first,range.last do
- texsetcatcode(i,11)
- end
- end
- end
- texsetcatcode(0x200C,11) -- non-joiner
- texsetcatcode(0x200D,11) -- joiner
- end
- tex.catcodetable = saved
- end
-
- local nofactivated = #tobeactivated
- if tobeactivated and nofactivated > 0 then
- for i=1,nofactivated do
- local u = activated[i]
- if u then
- report_defining("character 0x%05X is active in sets %s (%s)",u,concat(tobeactivated,","),data[u].description)
- end
- end
- local saved = tex.catcodetable
- for i=1,#tobeactivated do
- local vector = tobeactivated[i]
- if trace_defining then
- report_defining("defining %s active characters in vector %s",nofactivated,vector)
- end
- tex.catcodetable = vector
- for i=1,nofactivated do
- local u = activated[i]
- if u then
- texsetcatcode(u,13)
- end
- end
- end
- tex.catcodetable = saved
- end
-
-end
-
---[[ldx--
-<p>Setting the lccodes is also done in a loop over the data table.</p>
---ldx]]--
-
-local sfmode = "unset" -- unset, traditional, normal
-
-function characters.setcodes()
- if trace_defining then
- report_defining("defining lc and uc codes")
- end
- local traditional = sfstate == "traditional" or sfstate == "unset"
- for code, chr in next, data do
- local cc = chr.category
- if is_letter[cc] then
- local range = chr.range
- if range then
- for i=range.first,range.last do
- texsetcatcode(i,11) -- letter
- texsetlccode(i,i,i) -- self self
- end
- else
- local lc, uc = chr.lccode, chr.uccode
- if not lc then
- chr.lccode, lc = code, code
- elseif type(lc) == "table" then
- lc = code
- end
- if not uc then
- chr.uccode, uc = code, code
- elseif type(uc) == "table" then
- uc = code
- end
- texsetcatcode(code,11) -- letter
- texsetlccode(code,lc,uc)
- if traditional and cc == "lu" then
- texsetsfcode(code,999)
- end
- end
- elseif is_mark[cc] then
- texsetlccode(code,code,code) -- for hyphenation
- end
- end
- if traditional then
- sfstate = "traditional"
- end
-end
-
--- If this is something that is not documentwide and used a lot, then we
--- need a more clever approach (trivial but not now).
-
-local function setuppersfcodes(v,n)
- if sfstate ~= "unset" then
- report_defining("setting uppercase sf codes to %s",n)
- for code, chr in next, data do
- if chr.category == "lu" then
- texsetsfcode(code,n)
- end
- end
- end
- sfstate = v
-end
-
-directives.register("characters.spaceafteruppercase",function(v)
- if v == "traditional" then
- setuppersfcodes(v,999)
- elseif v == "normal" then
- setuppersfcodes(v,1000)
- end
-end)
-
---[[ldx--
<p>Next comes a whole series of helper methods. These are (will be) part
of the official <l n='api'/>.</p>
--ldx]]--
---[[ldx--
-<p>A couple of convenience methods. Beware, these are slower than directly
-accessing the data table.</p>
---ldx]]--
-
-- we could make them virtual: characters.contextnames[n]
function characters.contextname(n) return data[n].contextname or "" end
@@ -729,32 +525,6 @@ function characters.category(n,verbose)
end
end
--- xml support (moved)
-
-function characters.remapentity(chr,slot)
- contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
-end
-
-characters.activeoffset = 0x10000 -- there will be remapped in that byte range
-
--- xml.entities = xml.entities or { }
---
--- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
---
--- function characters.setmkiventities()
--- local entities = xml.entities
--- entities.lt = "<"
--- entities.amp = "&"
--- entities.gt = ">"
--- end
---
--- function characters.setmkiientities()
--- local entities = xml.entities
--- entities.lt = utfchar(characters.activeoffset + utfbyte("<"))
--- entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
--- entities.gt = utfchar(characters.activeoffset + utfbyte(">"))
--- end
-
-- -- some day we will make a table .. not that many calls to utfchar
--
-- local utfchar = utf.char
@@ -947,6 +717,7 @@ function characters.lettered(str,spacing)
end
return concat(new)
end
+
--[[ldx--
<p>Requesting lower and uppercase codes:</p>
--ldx]]--
@@ -963,15 +734,6 @@ function characters.safechar(n)
end
end
-function commands.safechar(n)
- local c = data[n]
- if c and c.contextname then
- contextsprint("\\" .. c.contextname) -- context[c.contextname]()
- else
- contextsprint(utfchar(n))
- end
-end
-
function characters.shape(n)
local shcode = shcodes[n]
if not shcode then
@@ -1087,11 +849,248 @@ if not characters.superscripts then
-- print(table.serialize(superscripts, "superscripts", { hexify = true }))
-- print(table.serialize(subscripts, "subscripts", { hexify = true }))
- storage.register("characters/superscripts", superscripts, "characters.superscripts")
- storage.register("characters/subscripts", subscripts, "characters.subscripts")
+ if storage then
+ storage.register("characters/superscripts", superscripts, "characters.superscripts")
+ storage.register("characters/subscripts", subscripts, "characters.subscripts")
+ end
end
--- interface
+-- the following code will move to char-tex.lua
+
+-- tex
+
+if not tex or not context or not commands then return characters end
+
+local tex = tex
+local texsetlccode = tex.setlccode
+local texsetuccode = tex.setuccode
+local texsetsfcode = tex.setsfcode
+local texsetcatcode = tex.setcatcode
+
+local contextsprint = context.sprint
+local ctxcatcodes = catcodes.numbers.ctxcatcodes
+
+--[[ldx--
+<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
+use the table. After all, we have this information available anyway.</p>
+--ldx]]--
+
+function commands.makeactive(n,name) --
+ contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
+ -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
+end
+
+function commands.utfchar(c,n)
+ if n then
+ -- contextsprint(c,charfromnumber(n))
+ contextsprint(c,utfchar(n))
+ else
+ -- contextsprint(charfromnumber(c))
+ contextsprint(utfchar(c))
+ end
+end
+
+function commands.safechar(n)
+ local c = data[n]
+ if c and c.contextname then
+ contextsprint("\\" .. c.contextname) -- context[c.contextname]()
+ else
+ contextsprint(utfchar(n))
+ end
+end
+
+tex.uprint = commands.utfchar
+
+local forbidden = tohash { -- at least now
+ 0x00A0,
+ 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D,
+ 0x202F,
+ 0x205F,
+ -- 0xFEFF,
+}
+
+function characters.define(tobelettered, tobeactivated) -- catcodetables
+
+ if trace_defining then
+ report_defining("defining active character commands")
+ end
+
+ local activated, a = { }, 0
+
+ for u, chr in next, data do -- these will be commands
+ local fallback = chr.fallback
+ if fallback then
+ contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}")
+ a = a + 1
+ activated[a] = u
+ else
+ local contextname = chr.contextname
+ if contextname then
+ local category = chr.category
+ if is_character[category] then
+ if chr.unicodeslot < 128 then
+ if is_letter[category] then
+ contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
+ else
+ contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s
+ end
+ else
+ contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s
+ end
+ elseif is_command[category] and not forbidden[u] then
+ contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}")
+ a = a + 1
+ activated[a] = u
+ end
+ end
+ end
+ end
+
+ if tobelettered then -- shared
+ local saved = tex.catcodetable
+ for i=1,#tobelettered do
+ tex.catcodetable = tobelettered[i]
+ if trace_defining then
+ report_defining("defining letters (global, shared)")
+ end
+ for u, chr in next, data do
+ if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then
+ texsetcatcode(u,11)
+ end
+ local range = chr.range
+ if range then
+ for i=1,range.first,range.last do
+ texsetcatcode(i,11)
+ end
+ end
+ end
+ texsetcatcode(0x200C,11) -- non-joiner
+ texsetcatcode(0x200D,11) -- joiner
+ end
+ tex.catcodetable = saved
+ end
+
+ local nofactivated = #tobeactivated
+ if tobeactivated and nofactivated > 0 then
+ for i=1,nofactivated do
+ local u = activated[i]
+ if u then
+ report_defining("character 0x%05X is active in sets %s (%s)",u,concat(tobeactivated,","),data[u].description)
+ end
+ end
+ local saved = tex.catcodetable
+ for i=1,#tobeactivated do
+ local vector = tobeactivated[i]
+ if trace_defining then
+ report_defining("defining %s active characters in vector %s",nofactivated,vector)
+ end
+ tex.catcodetable = vector
+ for i=1,nofactivated do
+ local u = activated[i]
+ if u then
+ texsetcatcode(u,13)
+ end
+ end
+ end
+ tex.catcodetable = saved
+ end
+
+end
+
+--[[ldx--
+<p>Setting the lccodes is also done in a loop over the data table.</p>
+--ldx]]--
+
+local sfmode = "unset" -- unset, traditional, normal
+
+function characters.setcodes()
+ if trace_defining then
+ report_defining("defining lc and uc codes")
+ end
+ local traditional = sfstate == "traditional" or sfstate == "unset"
+ for code, chr in next, data do
+ local cc = chr.category
+ if is_letter[cc] then
+ local range = chr.range
+ if range then
+ for i=range.first,range.last do
+ texsetcatcode(i,11) -- letter
+ texsetlccode(i,i,i) -- self self
+ end
+ else
+ local lc, uc = chr.lccode, chr.uccode
+ if not lc then
+ chr.lccode, lc = code, code
+ elseif type(lc) == "table" then
+ lc = code
+ end
+ if not uc then
+ chr.uccode, uc = code, code
+ elseif type(uc) == "table" then
+ uc = code
+ end
+ texsetcatcode(code,11) -- letter
+ texsetlccode(code,lc,uc)
+ if traditional and cc == "lu" then
+ texsetsfcode(code,999)
+ end
+ end
+ elseif is_mark[cc] then
+ texsetlccode(code,code,code) -- for hyphenation
+ end
+ end
+ if traditional then
+ sfstate = "traditional"
+ end
+end
+
+-- If this is something that is not documentwide and used a lot, then we
+-- need a more clever approach (trivial but not now).
+
+local function setuppersfcodes(v,n)
+ if sfstate ~= "unset" then
+ report_defining("setting uppercase sf codes to %s",n)
+ for code, chr in next, data do
+ if chr.category == "lu" then
+ texsetsfcode(code,n)
+ end
+ end
+ end
+ sfstate = v
+end
+
+directives.register("characters.spaceafteruppercase",function(v)
+ if v == "traditional" then
+ setuppersfcodes(v,999)
+ elseif v == "normal" then
+ setuppersfcodes(v,1000)
+ end
+end)
+
+-- xml
+
+characters.activeoffset = 0x10000 -- there will be remapped in that byte range
+
+function commands.remapentity(chr,slot)
+ contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
+end
+
+-- xml.entities = xml.entities or { }
+--
+-- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
+--
+-- function characters.setmkiventities()
+-- local entities = xml.entities
+-- entities.lt = "<"
+-- entities.amp = "&"
+-- entities.gt = ">"
+-- end
+--
+-- function characters.setmkiientities()
+-- local entities = xml.entities
+-- entities.lt = utfchar(characters.activeoffset + utfbyte("<"))
+-- entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
+-- entities.gt = utfchar(characters.activeoffset + utfbyte(">"))
+-- end
-commands.utfchar = tex.uprint