diff options
Diffstat (limited to 'tex/context/base/char-ini.lua')
-rw-r--r-- | tex/context/base/char-ini.lua | 493 |
1 files changed, 246 insertions, 247 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 277967ef3..9f8a0ce11 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -10,8 +10,6 @@ if not modules then modules = { } end modules ['char-ini'] = { -- we can remove the tag range starting at 0xE0000 (special applications) -local tex = tex - local utfchar, utfbyte, utfvalues, ustring = utf.char, utf.byte, utf.values, utf.ustring local concat, unpack, tohash = table.concat, table.unpack, table.tohash local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset @@ -23,14 +21,6 @@ local utf8char = patterns.utf8char local allocate = utilities.storage.allocate local mark = utilities.storage.mark -local texsetlccode = tex.setlccode -local texsetuccode = tex.setuccode -local texsetsfcode = tex.setsfcode -local texsetcatcode = tex.setcatcode - -local contextsprint = context.sprint -local ctxcatcodes = catcodes.numbers.ctxcatcodes -local texcatcodes = catcodes.numbers.texcatcodes local setmetatableindex = table.setmetatableindex @@ -318,7 +308,7 @@ end) local otfscripts = utilities.storage.allocate() characters.otfscripts = otfscripts -table.setmetatableindex(otfscripts,function(t,unicode) +setmetatableindex(otfscripts,function(t,unicode) for k, v in next, blocks do local first, last = v.first, v.last if unicode >= first and unicode <= last then @@ -493,7 +483,9 @@ if not characters.fallbacks then end -storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks") -- accents and such +if storage then + storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks") -- accents and such +end characters.directions = { } @@ -511,206 +503,10 @@ setmetatableindex(characters.directions,function(t,k) end) --[[ldx-- -<p>The <type>context</type> namespace is used to store methods and data -which is rather specific to <l n='context'/>.</p> ---ldx]]-- - ---[[ldx-- -<p>Instead of using a <l n='tex'/> file to define the named glyphs, we -use the table. After all, we have this information available anyway.</p> ---ldx]]-- - -function characters.makeactive(n,name) -- - contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)) - -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name) -end - -function tex.uprint(c,n) - if n then - -- contextsprint(c,charfromnumber(n)) - contextsprint(c,utfchar(n)) - else - -- contextsprint(charfromnumber(c)) - contextsprint(utfchar(c)) - end -end - -local forbidden = tohash { -- at least now - 0x00A0, - 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D, - 0x202F, - 0x205F, - -- 0xFEFF, -} - -function characters.define(tobelettered, tobeactivated) -- catcodetables - - if trace_defining then - report_defining("defining active character commands") - end - - local activated, a = { }, 0 - - for u, chr in next, data do -- these will be commands - local fallback = chr.fallback - if fallback then - contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") - a = a + 1 - activated[a] = u - else - local contextname = chr.contextname - if contextname then - local category = chr.category - if is_character[category] then - if chr.unicodeslot < 128 then - if is_letter[category] then - contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s - else - contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s - end - else - contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s - end - elseif is_command[category] and not forbidden[u] then - contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") - a = a + 1 - activated[a] = u - end - end - end - end - - if tobelettered then -- shared - local saved = tex.catcodetable - for i=1,#tobelettered do - tex.catcodetable = tobelettered[i] - if trace_defining then - report_defining("defining letters (global, shared)") - end - for u, chr in next, data do - if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then - texsetcatcode(u,11) - end - local range = chr.range - if range then - for i=1,range.first,range.last do - texsetcatcode(i,11) - end - end - end - texsetcatcode(0x200C,11) -- non-joiner - texsetcatcode(0x200D,11) -- joiner - end - tex.catcodetable = saved - end - - local nofactivated = #tobeactivated - if tobeactivated and nofactivated > 0 then - for i=1,nofactivated do - local u = activated[i] - if u then - report_defining("character 0x%05X is active in sets %s (%s)",u,concat(tobeactivated,","),data[u].description) - end - end - local saved = tex.catcodetable - for i=1,#tobeactivated do - local vector = tobeactivated[i] - if trace_defining then - report_defining("defining %s active characters in vector %s",nofactivated,vector) - end - tex.catcodetable = vector - for i=1,nofactivated do - local u = activated[i] - if u then - texsetcatcode(u,13) - end - end - end - tex.catcodetable = saved - end - -end - ---[[ldx-- -<p>Setting the lccodes is also done in a loop over the data table.</p> ---ldx]]-- - -local sfmode = "unset" -- unset, traditional, normal - -function characters.setcodes() - if trace_defining then - report_defining("defining lc and uc codes") - end - local traditional = sfstate == "traditional" or sfstate == "unset" - for code, chr in next, data do - local cc = chr.category - if is_letter[cc] then - local range = chr.range - if range then - for i=range.first,range.last do - texsetcatcode(i,11) -- letter - texsetlccode(i,i,i) -- self self - end - else - local lc, uc = chr.lccode, chr.uccode - if not lc then - chr.lccode, lc = code, code - elseif type(lc) == "table" then - lc = code - end - if not uc then - chr.uccode, uc = code, code - elseif type(uc) == "table" then - uc = code - end - texsetcatcode(code,11) -- letter - texsetlccode(code,lc,uc) - if traditional and cc == "lu" then - texsetsfcode(code,999) - end - end - elseif is_mark[cc] then - texsetlccode(code,code,code) -- for hyphenation - end - end - if traditional then - sfstate = "traditional" - end -end - --- If this is something that is not documentwide and used a lot, then we --- need a more clever approach (trivial but not now). - -local function setuppersfcodes(v,n) - if sfstate ~= "unset" then - report_defining("setting uppercase sf codes to %s",n) - for code, chr in next, data do - if chr.category == "lu" then - texsetsfcode(code,n) - end - end - end - sfstate = v -end - -directives.register("characters.spaceafteruppercase",function(v) - if v == "traditional" then - setuppersfcodes(v,999) - elseif v == "normal" then - setuppersfcodes(v,1000) - end -end) - ---[[ldx-- <p>Next comes a whole series of helper methods. These are (will be) part of the official <l n='api'/>.</p> --ldx]]-- ---[[ldx-- -<p>A couple of convenience methods. Beware, these are slower than directly -accessing the data table.</p> ---ldx]]-- - -- we could make them virtual: characters.contextnames[n] function characters.contextname(n) return data[n].contextname or "" end @@ -729,32 +525,6 @@ function characters.category(n,verbose) end end --- xml support (moved) - -function characters.remapentity(chr,slot) - contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr)) -end - -characters.activeoffset = 0x10000 -- there will be remapped in that byte range - --- xml.entities = xml.entities or { } --- --- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml --- --- function characters.setmkiventities() --- local entities = xml.entities --- entities.lt = "<" --- entities.amp = "&" --- entities.gt = ">" --- end --- --- function characters.setmkiientities() --- local entities = xml.entities --- entities.lt = utfchar(characters.activeoffset + utfbyte("<")) --- entities.amp = utfchar(characters.activeoffset + utfbyte("&")) --- entities.gt = utfchar(characters.activeoffset + utfbyte(">")) --- end - -- -- some day we will make a table .. not that many calls to utfchar -- -- local utfchar = utf.char @@ -947,6 +717,7 @@ function characters.lettered(str,spacing) end return concat(new) end + --[[ldx-- <p>Requesting lower and uppercase codes:</p> --ldx]]-- @@ -963,15 +734,6 @@ function characters.safechar(n) end end -function commands.safechar(n) - local c = data[n] - if c and c.contextname then - contextsprint("\\" .. c.contextname) -- context[c.contextname]() - else - contextsprint(utfchar(n)) - end -end - function characters.shape(n) local shcode = shcodes[n] if not shcode then @@ -1087,11 +849,248 @@ if not characters.superscripts then -- print(table.serialize(superscripts, "superscripts", { hexify = true })) -- print(table.serialize(subscripts, "subscripts", { hexify = true })) - storage.register("characters/superscripts", superscripts, "characters.superscripts") - storage.register("characters/subscripts", subscripts, "characters.subscripts") + if storage then + storage.register("characters/superscripts", superscripts, "characters.superscripts") + storage.register("characters/subscripts", subscripts, "characters.subscripts") + end end --- interface +-- the following code will move to char-tex.lua + +-- tex + +if not tex or not context or not commands then return characters end + +local tex = tex +local texsetlccode = tex.setlccode +local texsetuccode = tex.setuccode +local texsetsfcode = tex.setsfcode +local texsetcatcode = tex.setcatcode + +local contextsprint = context.sprint +local ctxcatcodes = catcodes.numbers.ctxcatcodes + +--[[ldx-- +<p>Instead of using a <l n='tex'/> file to define the named glyphs, we +use the table. After all, we have this information available anyway.</p> +--ldx]]-- + +function commands.makeactive(n,name) -- + contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)) + -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name) +end + +function commands.utfchar(c,n) + if n then + -- contextsprint(c,charfromnumber(n)) + contextsprint(c,utfchar(n)) + else + -- contextsprint(charfromnumber(c)) + contextsprint(utfchar(c)) + end +end + +function commands.safechar(n) + local c = data[n] + if c and c.contextname then + contextsprint("\\" .. c.contextname) -- context[c.contextname]() + else + contextsprint(utfchar(n)) + end +end + +tex.uprint = commands.utfchar + +local forbidden = tohash { -- at least now + 0x00A0, + 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D, + 0x202F, + 0x205F, + -- 0xFEFF, +} + +function characters.define(tobelettered, tobeactivated) -- catcodetables + + if trace_defining then + report_defining("defining active character commands") + end + + local activated, a = { }, 0 + + for u, chr in next, data do -- these will be commands + local fallback = chr.fallback + if fallback then + contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") + a = a + 1 + activated[a] = u + else + local contextname = chr.contextname + if contextname then + local category = chr.category + if is_character[category] then + if chr.unicodeslot < 128 then + if is_letter[category] then + contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s + else + contextsprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) -- has no s + end + else + contextsprint(ctxcatcodes,format("\\def\\%s{%s}",contextname,utfchar(u))) -- has no s + end + elseif is_command[category] and not forbidden[u] then + contextsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") + a = a + 1 + activated[a] = u + end + end + end + end + + if tobelettered then -- shared + local saved = tex.catcodetable + for i=1,#tobelettered do + tex.catcodetable = tobelettered[i] + if trace_defining then + report_defining("defining letters (global, shared)") + end + for u, chr in next, data do + if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then + texsetcatcode(u,11) + end + local range = chr.range + if range then + for i=1,range.first,range.last do + texsetcatcode(i,11) + end + end + end + texsetcatcode(0x200C,11) -- non-joiner + texsetcatcode(0x200D,11) -- joiner + end + tex.catcodetable = saved + end + + local nofactivated = #tobeactivated + if tobeactivated and nofactivated > 0 then + for i=1,nofactivated do + local u = activated[i] + if u then + report_defining("character 0x%05X is active in sets %s (%s)",u,concat(tobeactivated,","),data[u].description) + end + end + local saved = tex.catcodetable + for i=1,#tobeactivated do + local vector = tobeactivated[i] + if trace_defining then + report_defining("defining %s active characters in vector %s",nofactivated,vector) + end + tex.catcodetable = vector + for i=1,nofactivated do + local u = activated[i] + if u then + texsetcatcode(u,13) + end + end + end + tex.catcodetable = saved + end + +end + +--[[ldx-- +<p>Setting the lccodes is also done in a loop over the data table.</p> +--ldx]]-- + +local sfmode = "unset" -- unset, traditional, normal + +function characters.setcodes() + if trace_defining then + report_defining("defining lc and uc codes") + end + local traditional = sfstate == "traditional" or sfstate == "unset" + for code, chr in next, data do + local cc = chr.category + if is_letter[cc] then + local range = chr.range + if range then + for i=range.first,range.last do + texsetcatcode(i,11) -- letter + texsetlccode(i,i,i) -- self self + end + else + local lc, uc = chr.lccode, chr.uccode + if not lc then + chr.lccode, lc = code, code + elseif type(lc) == "table" then + lc = code + end + if not uc then + chr.uccode, uc = code, code + elseif type(uc) == "table" then + uc = code + end + texsetcatcode(code,11) -- letter + texsetlccode(code,lc,uc) + if traditional and cc == "lu" then + texsetsfcode(code,999) + end + end + elseif is_mark[cc] then + texsetlccode(code,code,code) -- for hyphenation + end + end + if traditional then + sfstate = "traditional" + end +end + +-- If this is something that is not documentwide and used a lot, then we +-- need a more clever approach (trivial but not now). + +local function setuppersfcodes(v,n) + if sfstate ~= "unset" then + report_defining("setting uppercase sf codes to %s",n) + for code, chr in next, data do + if chr.category == "lu" then + texsetsfcode(code,n) + end + end + end + sfstate = v +end + +directives.register("characters.spaceafteruppercase",function(v) + if v == "traditional" then + setuppersfcodes(v,999) + elseif v == "normal" then + setuppersfcodes(v,1000) + end +end) + +-- xml + +characters.activeoffset = 0x10000 -- there will be remapped in that byte range + +function commands.remapentity(chr,slot) + contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr)) +end + +-- xml.entities = xml.entities or { } +-- +-- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml +-- +-- function characters.setmkiventities() +-- local entities = xml.entities +-- entities.lt = "<" +-- entities.amp = "&" +-- entities.gt = ">" +-- end +-- +-- function characters.setmkiientities() +-- local entities = xml.entities +-- entities.lt = utfchar(characters.activeoffset + utfbyte("<")) +-- entities.amp = utfchar(characters.activeoffset + utfbyte("&")) +-- entities.gt = utfchar(characters.activeoffset + utfbyte(">")) +-- end -commands.utfchar = tex.uprint |