diff options
Diffstat (limited to 'tex/context/base/char-ini.lua')
-rw-r--r-- | tex/context/base/char-ini.lua | 338 |
1 files changed, 244 insertions, 94 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index a24de6e23..4893875c3 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -14,10 +14,17 @@ local concat = table.concat local next, tonumber = next, tonumber local texsprint, texprint = tex.sprint, tex.print local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch +local texsetlccode, texsetuccode, texsetsfcode, texsetcatcode = tex.setlccode, tex.setuccode, tex.setsfcode, tex.setcatcode + +local allocate, mark = utilities.storage.allocate, utilities.storage.mark local ctxcatcodes = tex.ctxcatcodes local texcatcodes = tex.texcatcodes +local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end) + +local report_defining = logs.new("characters") + --[[ldx-- <p>This module implements some methods and creates additional datastructured from the big character table that we use for all kind of purposes: @@ -27,15 +34,23 @@ from the big character table that we use for all kind of purposes: loaded!</p> --ldx]]-- -characters = characters or { } +characters = characters or { } local characters = characters -characters.data = characters.data or { } + local data = characters.data +if data then + mark(data) -- why does this fail +else + report_defining("fatal error: 'char-def.lua' is not loaded") + os.exit() +end + if not characters.ranges then - characters.ranges = { } + local ranges = allocate { } + characters.ranges = ranges for k, v in next, data do - characters.ranges[#characters.ranges+1] = k + ranges[#ranges+1] = k end end @@ -43,20 +58,18 @@ storage.register("characters/ranges",characters.ranges,"characters.ranges") local ranges = characters.ranges -setmetatable(data, { - __index = function(t,k) - for r=1,#ranges do - local rr = ranges[r] -- first in range - if k > rr and k <= data[rr].range then - t[k] = t[rr] - return t[k] - end +setmetatablekey(data, "__index", function(t,k) + for r=1,#ranges do + local rr = ranges[r] -- first in range + if k > rr and k <= data[rr].range then + t[k] = t[rr] + return t[k] end - return nil end -}) + return nil +end ) -characters.blocks = { +characters.blocks = allocate { ["aegeannumbers"] = { 0x10100, 0x1013F, "Aegean Numbers" }, ["alphabeticpresentationforms"] = { 0x0FB00, 0x0FB4F, "Alphabetic Presentation Forms" }, ["ancientgreekmusicalnotation"] = { 0x1D200, 0x1D24F, "Ancient Greek Musical Notation" }, @@ -249,7 +262,7 @@ function characters.getrange(name) return slot, slot, nil end -characters.categories = { +characters.categories = allocate { lu = "Letter Uppercase", ll = "Letter Lowercase", lt = "Letter Titlecase", @@ -285,22 +298,26 @@ characters.categories = { --~ special : cf (softhyphen) zs (emspace) --~ characters: ll lm lo lt lu mn nl no pc pd pe pf pi po ps sc sk sm so -characters.is_character = table.tohash { +local is_character = allocate ( table.tohash { "lu","ll","lt","lm","lo", "nd","nl","no", "mn", "nl","no", "pc","pd","ps","pe","pi","pf","po", "sm","sc","sk","so" -} +} ) -characters.is_letter = table.tohash { +local is_letter = allocate ( table.tohash { "ll","lm","lo","lt","lu" -} +} ) -characters.is_command = table.tohash { +local is_command = allocate ( table.tohash { "cf","zs" -} +} ) + +characters.is_character = is_character +characters.is_letter = is_letter +characters.is_command = is_command -- linebreak: todo: hash -- @@ -311,7 +328,7 @@ characters.is_command = table.tohash { -- -- N A H W F Na -characters.bidi = { +characters.bidi = allocate { l = "Left-to-Right", lre = "Left-to-Right Embedding", lro = "Left-to-Right Override", @@ -360,8 +377,8 @@ if not characters.fallbacks then end -storage.register("characters.fallbacks", characters.fallbacks, "characters.fallbacks") -storage.register("characters.directions", characters.directions, "characters.directions") +storage.register("characters/fallbacks", characters.fallbacks, "characters.fallbacks") +storage.register("characters/directions", characters.directions, "characters.directions") --[[ldx-- <p>The <type>context</type> namespace is used to store methods and data @@ -381,74 +398,155 @@ function tex.uprint(n) texsprint(ctxcatcodes,utfchar(n)) end -local template_a = "\\startextendcatcodetable{%s}\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined\\stopextendcatcodetable" -local template_b = "\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined" - --- we need a function for setting the codes .... - -function characters.define(tobelettered, tobeactivated) -- catcodetables - local is_character, is_command, is_letter = characters.is_character, characters.is_command, characters.is_letter - local lettered, activated = { }, { } - for u, chr in next, data do - -- we can use a macro instead of direct settings - local fallback = chr.fallback - if fallback then - -- texprint(format("{\\catcode %s=13\\unexpanded\\gdef %s{\\checkedchar{%s}{%s}}}",u,utfchar(u),u,fallback)) - texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") -- no texprint - activated[#activated+1] = "\\c"..u.."\\a" - else - local contextname = chr.contextname - local category = chr.category - if contextname then - if is_character[category] then - -- by this time, we're still in normal catcode mode - -- subtle: not "\\",contextname but "\\"..contextname - if chr.unicodeslot < 128 then - -- texprint(ctxcatcodes, "\\chardef\\"..contextname,"=",u) - texprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) - else - -- texprint(ctxcatcodes, "\\let\\"..contextname,"=",utfchar(u)) - texprint(ctxcatcodes,format("\\let\\%s=%s",contextname,utfchar(u))) - if is_letter[category] then - lettered[#lettered+1] = "\\c"..u.."\\l" +if texsetcatcode then + + -- todo -- define per table and then also register name (for tracing) + + function characters.define(tobelettered, tobeactivated) -- catcodetables + + if trace_defining then + report_defining("defining active character commands") + end + + local activated = { } + + for u, chr in next, data do -- these will be commands + local fallback = chr.fallback + if fallback then + texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") -- no texprint + activated[#activated+1] = u + else + local contextname = chr.contextname + if contextname then + local category = chr.category + if is_character[category] then + if chr.unicodeslot < 128 then + texprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) + else + texprint(ctxcatcodes,format("\\let\\%s=%s",contextname,utfchar(u))) end + elseif is_command[category] then + texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -- no texprint + activated[#activated+1] = u end - elseif is_command[category] then - -- this might change: contextcommand ipv contextname - -- texprint(format("{\\catcode %s=13\\unexpanded\\gdef %s{\\%s}}",u,utfchar(u),contextname)) - texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -- no texprint - activated[#activated+1] = "\\c"..u.."\\a" - end - elseif is_letter[category] then - if u >= 128 and u <= 65536 then -- catch private mess - lettered[#lettered+1] = "\\c"..u.."\\l" end end end - if chr.range then - lettered[#lettered+1] = format('\\dofastrecurse{"%05X}{"%05X}{1}{\\c\\fastrecursecounter\\l}',u,chr.range) + + if tobelettered then -- shared + -- local saved = tex.catcodetable + -- for i=1,#tobelettered do + -- tex.catcodetable = tobelettered[i] + if trace_defining then + report_defining("defining letters (global, shared)") + end + for u, chr in next, data do + if not chr.fallback and is_letter[chr.category] and u >= 128 and u <= 65536 then + texsetcatcode(u,11) + end + if chr.range then + for i=1,u,chr.range do + texsetcatcode(i,11) + end + end + end + texsetcatcode(0x200C,11) -- non-joiner + texsetcatcode(0x200D,11) -- joiner + -- end + -- tex.catcodetable = saved end - end - -- if false then - lettered[#lettered+1] = "\\c"..0x200C.."\\l" -- non-joiner - lettered[#lettered+1] = "\\c"..0x200D.."\\l" -- joiner - -- fi - if tobelettered then - lettered = concat(lettered) - if true then - texsprint(ctxcatcodes,format(template_b,lettered)) - else - for l=1,#tobelettered do - texsprint(ctxcatcodes,format(template_a,tobelettered[l],lettered)) + + local nofactivated = #tobeactivated + if tobeactivated and nofactivated > 0 then + for i=1,nofactivated do + local u = activated[i] + report_defining("character 0x%05X is active in sets %s (%s)",u,concat(tobeactivated,","),data[u].description) + end + local saved = tex.catcodetable + for i=1,#tobeactivated do + local vector = tobeactivated[i] + if trace_defining then + report_defining("defining %s active characters in vector %s",nofactivated,vector) + end + tex.catcodetable = vector + for i=1,nofactivated do + texsetcatcode(activated[i],13) + end end + tex.catcodetable = saved end + end - if tobeactivated then - activated = concat(activated) - for a=1,#tobeactivated do - texsprint(ctxcatcodes,format(template_a,tobeactivated[a],activated)) + +else -- keep this + + local template_a = "\\startextendcatcodetable{%s}\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined\\stopextendcatcodetable" + local template_b = "\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined" + + function characters.define(tobelettered, tobeactivated) -- catcodetables + local lettered, activated = { }, { } + for u, chr in next, data do + -- we can use a macro instead of direct settings + local fallback = chr.fallback + if fallback then + -- texprint(format("{\\catcode %s=13\\unexpanded\\gdef %s{\\checkedchar{%s}{%s}}}",u,utfchar(u),u,fallback)) + texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") -- no texprint + activated[#activated+1] = "\\c"..u.."\\a" + else + local contextname = chr.contextname + local category = chr.category + if contextname then + if is_character[category] then + -- by this time, we're still in normal catcode mode + -- subtle: not "\\",contextname but "\\"..contextname + if chr.unicodeslot < 128 then + -- texprint(ctxcatcodes, "\\chardef\\"..contextname,"=",u) + texprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) + else + -- texprint(ctxcatcodes, "\\let\\"..contextname,"=",utfchar(u)) + texprint(ctxcatcodes,format("\\let\\%s=%s",contextname,utfchar(u))) + if is_letter[category] then + lettered[#lettered+1] = "\\c"..u.."\\l" + end + end + elseif is_command[category] then + -- this might change: contextcommand ipv contextname + -- texprint(format("{\\catcode %s=13\\unexpanded\\gdef %s{\\%s}}",u,utfchar(u),contextname)) + texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -- no texprint + activated[#activated+1] = "\\c"..u.."\\a" + end + elseif is_letter[category] then + if u >= 128 and u <= 65536 then -- catch private mess + lettered[#lettered+1] = "\\c"..u.."\\l" + end + end + end + if chr.range then + lettered[#lettered+1] = format('\\dofastrecurse{"%05X}{"%05X}{1}{\\c\\fastrecursecounter\\l}',u,chr.range) + end + end + -- if false then + lettered[#lettered+1] = "\\c"..0x200C.."\\l" -- non-joiner + lettered[#lettered+1] = "\\c"..0x200D.."\\l" -- joiner + -- fi + if tobelettered then + lettered = concat(lettered) + if true then + texsprint(ctxcatcodes,format(template_b,lettered)) -- global + else + for l=1,#tobelettered do + texsprint(ctxcatcodes,format(template_a,tobelettered[l],lettered)) + end + end + end + if tobeactivated then + activated = concat(activated) + for a=1,#tobeactivated do + texsprint(ctxcatcodes,format(template_a,tobeactivated[a],activated)) + end end end + end function characters.charcode(box) @@ -461,24 +559,64 @@ end <p>Setting the lccodes is also done in a loop over the data table.</p> --ldx]]-- +--~ function tex.setsfcode (index,sf) ... end +--~ function tex.setlccode (index,lc,[uc]) ... end -- optional third value, safes call +--~ function tex.setuccode (index,uc,[lc]) ... end +--~ function tex.setcatcode(index,cc) ... end + -- we need a function ... -function characters.setcodes() - for code, chr in next, data do - local cc = chr.category - if cc == 'll' or cc == 'lu' or cc == 'lt' then - local lc, uc = chr.lccode, chr.uccode - if not lc then chr.lccode, lc = code, code end - if not uc then chr.uccode, uc = code, code end - texsprint(ctxcatcodes,format("\\setcclcuc{%i}{%i}{%i}",code,lc,uc)) +--~ tex.lccode +--~ tex.uccode +--~ tex.sfcode +--~ tex.catcode + +if texsetcatcode then + + function characters.setcodes() + if trace_defining then + report_defining("defining lc and uc codes") end - if cc == "lu" then - texprint(ctxcatcodes,"\\sfcode ",code,"999 ") + for code, chr in next, data do + local cc = chr.category + if cc == 'll' or cc == 'lu' or cc == 'lt' then + local lc, uc = chr.lccode, chr.uccode + if not lc then chr.lccode, lc = code, code end + if not uc then chr.uccode, uc = code, code end + texsetcatcode(code,11) -- letter + texsetlccode(code,lc,uc) + if cc == "lu" then + texsetsfcode(code,999) + end + elseif cc == "lo" and chr.range then + for i=code,chr.range do + texsetcatcode(code,11) -- letter + texsetlccode(code,code,code) -- self self + end + end end - if cc == "lo" and chr.range then - texsprint(ctxcatcodes,format('\\dofastrecurse{"%05X}{"%05X}{1}{\\setcclcucself\\fastrecursecounter}',code,chr.range)) + end + +else -- keep this one + + function characters.setcodes() + for code, chr in next, data do + local cc = chr.category + if cc == 'll' or cc == 'lu' or cc == 'lt' then + local lc, uc = chr.lccode, chr.uccode + if not lc then chr.lccode, lc = code, code end + if not uc then chr.uccode, uc = code, code end + texsprint(ctxcatcodes,format("\\setcclcuc{%i}{%i}{%i}",code,lc,uc)) + end + if cc == "lu" then + texprint(ctxcatcodes,"\\sfcode ",code,"999 ") + end + if cc == "lo" and chr.range then + texsprint(ctxcatcodes,format('\\dofastrecurse{"%05X}{"%05X}{1}{\\setcclcucself\\fastrecursecounter}',code,chr.range)) + end end end + end --[[ldx-- @@ -649,6 +787,18 @@ function characters.upper(str) return concat(new) end +function characters.lettered(str) + local new = { } + for u in utfvalues(str) do + local d = data[u] + if is_letter[d.category] then + new[#new+1] = utfchar(d.lccode or u) + end + end + return concat(new) +end + + -- -- some day we might go this route, but it does not really save that much -- -- so not now (we can generate a lot using mtx-unicode that operates on the -- -- database) |