if not modules then modules = { } end modules ['char-tex'] = {
version = 1.001,
comment = "companion to char-ini.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
local lpeg = lpeg
local tonumber, next, type = tonumber, next, type
local format, find, gmatch, match, gsub = string.format, string.find, string.gmatch, string.match, string.gsub
local utfchar, utfbyte = utf.char, utf.byte
local concat, tohash = table.concat, table.tohash
local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
local lpegpatterns = lpeg.patterns
local lpegmatch = lpeg.match
local utfchartabletopattern = lpeg.utfchartabletopattern
local allocate = utilities.storage.allocate
local mark = utilities.storage.mark
local context = context
local commands = commands
local characters = characters
local texcharacters = { }
characters.tex = texcharacters
local utffilters = characters.filters.utf
local allocate = utilities.storage.allocate or function() return { } end
local mark = utilities.storage.mark or allocate
local is_character = characters.is_character
local is_letter = characters.is_letter
local is_command = characters.is_command
local is_spacing = characters.is_spacing
local is_mark = characters.is_mark
local is_punctuation = characters.is_punctuation
local data = characters.data if not data then return end
local blocks = characters.blocks
local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
local report_defining = logs.reporter("characters")
--[[ldx--
In order to deal with 8-bit output, we need to find a way to go from to
8-bit. This is handled in the engine itself.
This leaves us problems with characters that are specific to like
{}, $ and alike. We can remap some chars that tex input files
are sensitive for to a private area (while writing to a utility file) and revert then
to their original slot when we read in such a file. Instead of reverting, we can (when
we resolve characters to glyphs) map them to their right glyph there. For this purpose
we can use the private planes 0x0F0000 and 0x100000.
--ldx]]--
local low = allocate()
local high = allocate()
local escapes = allocate()
local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"
local private = {
low = low,
high = high,
escapes = escapes,
}
utffilters.private = private
for ch in gmatch(special,".") do
local cb
if type(ch) == "number" then
cb, ch = ch, utfchar(ch)
else
cb = utfbyte(ch)
end
if cb < 256 then
escapes[ch] = "\\" .. ch
low[ch] = utfchar(0x0F0000 + cb)
if ch == "%" then
ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
end
high[utfchar(0x0F0000 + cb)] = ch
end
end
local tohigh = lpeg.replacer(low) -- frozen, only for basic tex
local tolow = lpeg.replacer(high) -- frozen, only for basic tex
lpegpatterns.utftohigh = tohigh
lpegpatterns.utftolow = tolow
function utffilters.harden(str)
return lpegmatch(tohigh,str)
end
function utffilters.soften(str)
return lpegmatch(tolow,str)
end
private.escape = utf.remapper(escapes) -- maybe: ,"dynamic"
private.replace = utf.remapper(low) -- maybe: ,"dynamic"
private.revert = utf.remapper(high) -- maybe: ,"dynamic"
--[[ldx--
We get a more efficient variant of this when we integrate
replacements in collapser. This more or less renders the previous
private code redundant. The following code is equivalent but the
first snippet uses the relocated dollars.
[x] [$x$]
--ldx]]--
-- using the tree-lpeg-mapper would be nice but we also need to deal with end-of-string
-- cases: "\"\i" and don't want "\relax" to be seen as \r e lax" (for which we need to mess
-- with spaces
local accentmapping = allocate {
['"'] = { [""] = "¨",
A = "Ä", a = "ä",
E = "Ë", e = "ë",
I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï",
O = "Ö", o = "ö",
U = "Ü", u = "ü",
Y = "Ÿ", y = "ÿ",
},
["'"] = { [""] = "´",
A = "Á", a = "á",
C = "Ć", c = "ć",
E = "É", e = "é",
I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í",
L = "Ĺ", l = "ĺ",
N = "Ń", n = "ń",
O = "Ó", o = "ó",
R = "Ŕ", r = "ŕ",
S = "Ś", s = "ś",
U = "Ú", u = "ú",
Y = "Ý", y = "ý",
Z = "Ź", z = "ź",
},
["."] = { [""] = "˙",
C = "Ċ", c = "ċ",
E = "Ė", e = "ė",
G = "Ġ", g = "ġ",
I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i",
Z = "Ż", z = "ż",
},
["="] = { [""] = "¯",
A = "Ā", a = "ā",
E = "Ē", e = "ē",
I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī",
O = "Ō", o = "ō",
U = "Ū", u = "ū",
},
["H"] = { [""] = "˝",
O = "Ő", o = "ő",
U = "Ű", u = "ű",
},
["^"] = { [""] = "ˆ",
A = "Â", a = "â",
C = "Ĉ", c = "ĉ",
E = "Ê", e = "ê",
G = "Ĝ", g = "ĝ",
H = "Ĥ", h = "ĥ",
I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î",
J = "Ĵ", j = "ĵ",
O = "Ô", o = "ô",
S = "Ŝ", s = "ŝ",
U = "Û", u = "û",
W = "Ŵ", w = "ŵ",
Y = "Ŷ", y = "ŷ",
},
["`"] = { [""] = "`",
A = "À", a = "à",
E = "È", e = "è",
I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì",
O = "Ò", o = "ò",
U = "Ù", u = "ù",
Y = "Ỳ", y = "ỳ",
},
["c"] = { [""] = "¸",
C = "Ç", c = "ç",
K = "Ķ", k = "ķ",
L = "Ļ", l = "ļ",
N = "Ņ", n = "ņ",
R = "Ŗ", r = "ŗ",
S = "Ş", s = "ş",
T = "Ţ", t = "ţ",
},
["k"] = { [""] = "˛",
A = "Ą", a = "ą",
E = "Ę", e = "ę",
I = "Į", i = "į",
U = "Ų", u = "ų",
},
["r"] = { [""] = "˚",
A = "Å", a = "å",
U = "Ů", u = "ů",
},
["u"] = { [""] = "˘",
A = "Ă", a = "ă",
E = "Ĕ", e = "ĕ",
G = "Ğ", g = "ğ",
I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ",
O = "Ŏ", o = "ŏ",
U = "Ŭ", u = "ŭ",
},
["v"] = { [""] = "ˇ",
C = "Č", c = "č",
D = "Ď", d = "ď",
E = "Ě", e = "ě",
L = "Ľ", l = "ľ",
N = "Ň", n = "ň",
R = "Ř", r = "ř",
S = "Š", s = "š",
T = "Ť", t = "ť",
Z = "Ž", z = "ž",
},
["~"] = { [""] = "˜",
A = "Ã", a = "ã",
I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ",
N = "Ñ", n = "ñ",
O = "Õ", o = "õ",
U = "Ũ", u = "ũ",
},
}
texcharacters.accentmapping = accentmapping
local accent_map = allocate { -- incomplete
['~'] = "̃" , -- ̃ Ẽ
['"'] = "̈" , -- ̈ Ë
["`"] = "̀" , -- ̀ È
["'"] = "́" , -- ́ É
["^"] = "̂" , -- ̂ Ê
-- ̄ Ē
-- ̆ Ĕ
-- ̇ Ė
-- ̉ Ẻ
-- ̌ Ě
-- ̏ Ȅ
-- ̑ Ȇ
-- ̣ Ẹ
-- ̧ Ȩ
-- ̨ Ę
-- ̭ Ḙ
-- ̰ Ḛ
}
-- local accents = concat(table.keys(accentmapping)) -- was _map
local function remap_accent(a,c,braced)
local m = accentmapping[a]
if m then
local n = m[c]
if n then
return n
end
end
-- local m = accent_map[a]
-- if m then
-- return c .. m
-- elseif braced then -- or #c > 0
if braced then -- or #c > 0
return "\\" .. a .. "{" .. c .. "}"
else
return "\\" .. a .. " " .. c
end
end
local commandmapping = allocate {
["aa"] = "å", ["AA"] = "Å",
["ae"] = "æ", ["AE"] = "Æ",
["cc"] = "ç", ["CC"] = "Ç",
["i"] = "ı", ["j"] = "ȷ",
["ij"] = "ij", ["IJ"] = "IJ",
["l"] = "ł", ["L"] = "Ł",
["o"] = "ø", ["O"] = "Ø",
["oe"] = "œ", ["OE"] = "Œ",
["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß", -- uppercase: ẞ
}
texcharacters.commandmapping = commandmapping
local ligaturemapping = allocate {
["''"] = "”",
["``"] = "“",
["--"] = "–",
["---"] = "—",
}
-- Older accent handling code can be found in char-def.lua but in the meantime
-- we moved on. First the one with commands:
local untex, pattern
local function toutfpattern()
if not untex then
local hash = { }
for k, v in next, accentmapping do
for kk, vv in next, v do
if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
hash[ "\\"..k.." "..kk ] = vv
hash["{\\"..k.." "..kk.."}"] = vv
else
hash["\\" ..k ..kk ] = vv
hash["{\\"..k ..kk.."}"] = vv
end
hash["\\" ..k.."{"..kk.."}" ] = vv
hash["{\\"..k.."{"..kk.."}}"] = vv
end
end
for k, v in next, commandmapping do
hash["\\"..k.." "] = v
hash["{\\"..k.."}"] = v
hash["{\\"..k.." }"] = v
end
for k, v in next, ligaturemapping do
hash[k] = v
end
untex = utfchartabletopattern(hash) / hash
end
return untex
end
local function prepare()
pattern = Cs((toutfpattern() + P(1))^0)
return pattern
end
local function textoutf(str,strip)
if str == "" then
return str
elseif not find(str,"\\",1,true) then
return str
-- elseif strip then
else
return lpegmatch(pattern or prepare(),str)
end
end
texcharacters.toutfpattern = toutfpattern
texcharacters.toutf = textoutf
-- print(texcharacters.toutf([[\~{Z}]],true))
-- print(texcharacters.toutf([[\'\i]],true))
-- print(texcharacters.toutf([[\'{\i}]],true))
-- print(texcharacters.toutf([[\"{e}]],true))
-- print(texcharacters.toutf([[\" {e}]],true))
-- print(texcharacters.toutf([[{\"{e}}]],true))
-- print(texcharacters.toutf([[{\" {e}}]],true))
-- print(texcharacters.toutf([[{\l}]],true))
-- print(texcharacters.toutf([[{\l }]],true))
-- print(texcharacters.toutf([[\v{r}]],true))
-- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true))
-- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))
-- Next the ones without backslash
local untex, pattern
local function toutfpattern()
if not untex then
local hash = { }
for k, v in next, accentmapping do
for kk, vv in next, v do
hash[k..kk] = vv
end
end
for k, v in next, commandmapping do
hash[k] = v
end
for k, v in next, ligaturemapping do
hash[k] = v
end
untex = utfchartabletopattern(hash) / hash
end
return untex
end
local function prepare()
pattern = Cs((toutfpattern() + P(1))^0)
return pattern
end
local function textoutf(str)
return lpegmatch(pattern or prepare(),str)
end
texcharacters.strtoutfpattern = toutfpattern
texcharacters.strtextoutf = textoutf
local collapse = utffilters.collapse
local combine = utffilters.combine
if not interfaces then return end
local implement = interfaces.implement
local pattern
local verbosemarks = characters.verbosemarks
if verbosemarks then
mark(verbosemarks)
else
verbosemarks = allocate {
["stroke"] = utfchar(0x02F), ["slash"] = utfchar(0x02F),
["middle dot"] = utfchar(0x0B7),
["grave"] = utfchar(0x300),
["acute"] = utfchar(0x301),
["circumflex"] = utfchar(0x302),
["tilde"] = utfchar(0x303),
["macron"] = utfchar(0x304), ["line"] = utfchar(0x304),
["overline"] = utfchar(0x305),
["breve"] = utfchar(0x306),
["dot"] = utfchar(0x307),
["dieresis"] = utfchar(0x308), ["diaeresis"] = utfchar(0x308),
["hook"] = utfchar(0x309),
["ring"] = utfchar(0x30A),
["double acute"] = utfchar(0x30B), ["hungarumlaut"] = utfchar(0x30B), -- tex speak
["caron"] = utfchar(0x30C),
["vertical line"] = utfchar(0x30D),
["double vertical line"] = utfchar(0x30E),
["double grave"] = utfchar(0x30F),
["inverted breve"] = utfchar(0x311),
["dot below"] = utfchar(0x323),
["ring below"] = utfchar(0x325),
["cedilla"] = utfchar(0x327), ["comma below"] = utfchar(0x327),
["ogonek"] = utfchar(0x328),
["caron below"] = utfchar(0x32C),
["circumflex below"] = utfchar(0x32D),
["tilde below"] = utfchar(0x330),
["macron below"] = utfchar(0x331), ["line below"] = utfchar(0x331),
["hook below"] = utfchar(0x1FA9D),
}
characters.verbosemarks = verbosemarks
if storage then
storage.register("characters/verbosemarks", verbosemarks, "characters.verbosemarks")
end
end
local function prepare()
pattern = Cs((utfchartabletopattern(verbosemarks) / verbosemarks + lpegpatterns.space/"" + lpegpatterns.utf8character)^0)
return pattern
end
local hash = table.setmetatableindex(function(t,k)
local f = ""
k = lpegmatch(pattern or prepare(),k) or k
local v = collapse(k) or k -- char specials
-- print("collapse",k,v)
if k ~= v then
goto DONE
end
v = combine(k) or k -- with specials
-- print("combine",k,v)
if k ~= v then
goto DONE
end
v = commandmapping[k] or k
-- print("command",k,v)
if k ~= v then
f = "\\"
goto DONE
end
v = textoutf(k) or k
-- print("utf",k,v)
if k ~= v then
f = "\\"
goto DONE
end
::DONE::
report_defining("instead of old school '%s%s' you can input the utf sequence %s",f,k,v)
t[k] = v
return v
end)
implement {
name = "chr",
arguments = "argument",
public = true,
actions = function(str)
context(hash[str]) -- expandable
end
}
function texcharacters.safechar(n) -- was characters.safechar
local c = data[n]
if c and c.contextname then
return "\\" .. c.contextname
else
return utfchar(n)
end
end
if not context or not commands then
-- used in e.g. mtx-bibtex
return
end
-- all kind of initializations
local tex = tex
local texsetlccode = tex.setlccode
local texsetsfcode = tex.setsfcode
local texsetcatcode = tex.setcatcode
local contextsprint = context.sprint
local ctxcatcodes = catcodes.numbers.ctxcatcodes
local texsetmacro = tokens.setters.macro
local texsetchar = tokens.setters.char
-- function texcharacters.defineaccents()
-- local ctx_dodefineaccentcommand = context.dodefineaccent
-- local ctx_dodefineaccent = context.dodefineaccent
-- local ctx_dodefinecommand = context.dodefinecommand
-- for accent, group in next, accentmapping do
-- ctx_dodefineaccentcommand(accent)
-- for character, mapping in next, group do
-- ctx_dodefineaccent(accent,character,mapping)
-- end
-- end
-- for command, mapping in next, commandmapping do
-- ctx_dodefinecommand(command,mapping)
-- end
-- os.exit()
-- end
function texcharacters.defineaccents()
local ctx_dodefinecombine = context.dodefinecombine
local ctx_dodefinecommand = context.dodefinecommand
for verbose, mark in next, verbosemarks do
ctx_dodefinecombine((gsub(verbose," ","")),mark)
end
for command, mapping in next, commandmapping do
ctx_dodefinecommand(command,mapping)
end
end
implement { -- a waste of scanner but consistent
name = "defineaccents",
actions = texcharacters.defineaccents
}
--[[ldx--
Instead of using a file to define the named glyphs, we
use the table. After all, we have this information available anyway.
--ldx]]--
local function to_number(s)
local n = tonumber(s)
if n then
return n
end
return tonumber(match(s,'^"(.*)$'),16) or 0
end
implement {
name = "utfchar",
actions = { to_number, utfchar, contextsprint },
arguments = "string"
}
implement {
name = "safechar",
actions = { to_number, texcharacters.safechar, contextsprint },
arguments = "string"
}
implement {
name = "uchar",
arguments = { "integer", "integer" },
actions = function(h,l)
context(utfchar(h*256+l))
end
}
tex.uprint = commands.utfchar
-- in contect we don't use lc and uc codes (in fact in luatex we should have a hf code)
-- so at some point we might drop this
-- The following get set at the TeX end:
local forbidden = tohash {
0x000A0, -- zs nobreakspace
0x000AD, -- cf softhyphen
-- 0x00600, -- cf arabicnumber
-- 0x00601, -- cf arabicsanah
-- 0x00602, -- cf arabicfootnotemarker
-- 0x00603, -- cf arabicsafha
-- 0x00604, -- cf arabicsamvat
-- 0x00605, -- cf arabicnumberabove
-- 0x0061C, -- cf arabiclettermark
-- 0x006DD, -- cf arabicendofayah
-- 0x008E2, -- cf arabicdisputedendofayah
0x02000, -- zs enquad
0x02001, -- zs emquad
0x02002, -- zs enspace \kern .5\emwidth
0x02003, -- zs emspace \hskip \emwidth
0x02004, -- zs threeperemspace
0x02005, -- zs fourperemspace
0x02006, -- zs sixperemspace
0x02007, -- zs figurespace
0x02008, -- zs punctuationspace
0x02009, -- zs breakablethinspace
0x0200A, -- zs hairspace
0x0200B, -- cf zerowidthspace
0x0200C, -- cf zwnj
0x0200D, -- cf zwj
0x0202F, -- zs narrownobreakspace
0x0205F, -- zs medspace \textormathspace +\medmuskip 2
-- 0x03000, -- zs ideographicspace
-- 0x0FEFF, -- cf zerowidthnobreakspace \penalty \plustenthousand \kern \zeropoint
}
local csletters = characters.csletters -- also a signal that we have initialized
local activated = { }
local sfmode = "unset" -- unset, traditional, normal
local block_too = false
directives.register("characters.blockstoo",function(v) block_too = v end)
-- If this is something that is not documentwide and used a lot, then we
-- need a more clever approach (trivial but not now).
local function setuppersfcodes(v,n)
if sfstate ~= "unset" then
report_defining("setting uppercase sf codes to %a",n)
for u, chr in next, data do
if chr.category == "lu" then
texsetsfcode(u,n)
end
end
end
sfstate = v
end
directives.register("characters.spaceafteruppercase",function(v)
if v == "traditional" then
setuppersfcodes(v,999)
elseif v == "normal" then
setuppersfcodes(v,1000)
end
end)
if not csletters then
csletters = allocate()
characters.csletters = csletters
report_defining("setting up character related codes and commands")
if sfstate == "unset" then
sfstate = "traditional"
end
local traditional = sfstate == "traditional"
for u, chr in next, data do -- will move up
local contextname = chr.contextname
local category = chr.category
local isletter = is_letter[category]
if contextname then
if is_character[category] then
if chr.unicodeslot < 128 then
if isletter then
local c = utfchar(u)
csletters[c] = u
end
else
local c = utfchar(u)
if isletter and u >= 32 and u <= 65536 then
csletters[c] = u
end
end
if isletter then
local lc = chr.lccode
local uc = chr.uccode
if not lc then
chr.lccode = u
lc = u
elseif type(lc) == "table" then
lc = u
end
if not uc then
chr.uccode = u
uc = u
elseif type(uc) == "table" then
uc = u
end
texsetlccode(u,lc,uc)
if traditional and category == "lu" then
texsetsfcode(code,999)
end
end
elseif is_command[category] and not forbidden[u] then
-- skip
elseif is_mark[category] then
texsetlccode(u,u,u) -- for hyphenation
end
elseif isletter then
csletters[utfchar(u)] = u
local lc, uc = chr.lccode, chr.uccode
if not lc then
chr.lccode = u
lc = u
elseif type(lc) == "table" then
lc = u
end
if not uc then
chr.uccode = u
uc = u
elseif type(uc) == "table" then
uc = u
end
texsetlccode(u,lc,uc)
if traditional and category == "lu" then
texsetsfcode(code,999)
end
elseif is_mark[category] then
texsetlccode(u,u,u) -- for hyphenation
end
end
if blocks_too then
-- this slows down format generation by over 10 percent
for k, v in next, blocks do
if v.catcode == "letter" then
local first = v.first
local last = v.last
local gaps = v.gaps
if first and last then
for u=first,last do
csletters[utfchar(u)] = u
--
-- texsetlccode(u,u,u) -- self self
--
end
end
if gaps then
for i=1,#gaps do
local u = gaps[i]
csletters[utfchar(u)] = u
--
-- texsetlccode(u,u,u) -- self self
--
end
end
end
end
end
if storage then
storage.register("characters/csletters", csletters, "characters.csletters")
end
function characters.setcharacternames(ctt)
for u, chr in next, data do -- will move up
local contextname = chr.contextname
local category = chr.category
local isletter = is_letter[category]
if contextname then
if is_character[category] then
if chr.unicodeslot < 128 then
if isletter then
texsetmacro(contextname,utfchar(u),"immutable")
else
texsetchar(contextname,u,"immutable")
end
else
texsetmacro(contextname,utfchar(u),"immutable")
end
elseif is_command[category] and not forbidden[u] then
texsetmacro(contextname,utfchar(u),"immutable")
end
end
end
end
else
mark(csletters)
end
lpegpatterns.csletter = utfchartabletopattern(csletters)
-- The engine presets the letters to 11 (always).
function characters.setlettercatcodes(cct)
if trace_defining then
report_defining("assigning letter catcodes to catcode table %a",cct)
end
local saved = tex.catcodetable
tex.catcodetable = cct
texsetcatcode(0x200C,11) -- non-joiner
texsetcatcode(0x200D,11) -- joiner
for c, u in next, csletters do
texsetcatcode(u,11)
end
tex.catcodetable = saved
end
function characters.setothercatcodes(cct)
if trace_defining then
report_defining("assigning other catcodes to catcode table %a",cct)
end
local saved = tex.catcodetable
tex.catcodetable = cct
for u=65,90 do
texsetcatcode(u,12)
end
for u=97,122 do
texsetcatcode(u,12)
end
tex.catcodetable = saved
end
function characters.setactivecatcodes(cct)
local saved = tex.catcodetable
tex.catcodetable = cct
for i=1,#activated do
local u = activated[i]
texsetcatcode(u,13)
if trace_defining then
report_defining("character %U (%s) is active in set %a",u,data[u].description,cct)
end
end
tex.catcodetable = saved
end
--[[ldx--
Setting the lccodes is also done in a loop over the data table.
--ldx]]--
implement {
name = "chardescription",
arguments = "integer",
actions = function(slot)
local d = data[slot]
if d then
context(d.description)
end
end,
}
if characters.setcharacternames then -- only in ini mode
implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" }
implement { name = "setothercatcodes", scope = "private", actions = characters.setothercatcodes, arguments = "integer" }
implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" }
implement { name = "setcharacternames", scope = "private", actions = characters.setcharacternames, arguments = "integer" }
end
-- experiment (some can move to char-ini.lua)
local function overload(c,u,code,codes)
local c = tonumber(c)
if not c then
return
end
local u = utilities.parsers.settings_to_array(u)
local n = #u
if n == 0 then
return
end
local t = nil
if n == 1 then
t = tonumber(u[1])
else
t = { }
for i=1,n do
t[#t+1] = tonumber(u[i])
end
end
if t then
data[c][code] = t
characters[codes][c] = nil
end
end
implement {
name = "overloaduppercase",
arguments = "2 strings",
actions = function(c,u)
overload(c,u,"uccode","uccodes")
end
}
implement {
name = "overloadlowercase",
arguments = "2 strings",
actions = function(c,u)
overload(c,u,"lccode","lccodes")
end
}
-- Just for fun we support keywords:
--
-- \startTEXpage[offset=10pt]
-- abg"
-- \sl \showboxes
-- \accent `" h%
-- \accent `" x%
-- \accent yoffset .2ex `" x
-- \accent yoffset 1.1ex `x x%
-- \stopTEXpage
--
-- We could do this:
--
-- \startTEXpage[offset=10pt]
-- abg"
-- \sl \showboxes
-- \withaccent `" h%
-- \withaccent `" x%
-- \withaccent yoffset .2ex `" x
-- \withaccent yoffset 1.1ex accent `x base `x%
-- \stopTEXpage
--
-- But only when users demand it:
--
-- do
--
-- local new_glyph = nodes.pool.glyph
--
-- local scankeyword = tokens.scanners.keyword
-- local scaninteger = tokens.scanners.integer
-- local scandimension = tokens.scanners.dimension
-- local scantoken = tokens.scanners.token
--
-- implement {
-- name = "withaccent",
-- public = true,
-- protected = true,
-- actions = function()
-- local xoffset = 0
-- local yoffset = 0
-- local accent = false
-- local base = false
-- local zwj = 0x200D
-- while true do
-- if scankeyword("xoffset") then
-- xoffset = scandimension()
-- elseif scankeyword("yoffset") then
-- yoffset = scandimension()
-- elseif scankeyword("accent") then
-- accent = scaninteger()
-- elseif scankeyword("base") then
-- base = scaninteger()
-- else
-- break
-- end
-- end
-- if not accent then
-- accent = scaninteger()
-- end
-- if not base then
-- local nxttok = scantoken()
-- base = nxttok.cmdname == "char_number" and scaninteger() or nxttok.index
-- end
-- if base and accent and base > 0 and accent > 0 then
-- base = new_glyph(true,base)
-- zwj = new_glyph(true,zwj)
-- accent = new_glyph(true,accent)
-- local slant = fonts.hashes.parameters[true].slant / 65536 -- a la tex
-- local xheight = fonts.hashes.parameters[true].xheight -- hm, compensated for glyphscale?
-- accent.xoffset = xoffset - .5*(base.width -accent.width) + .5*(base.height-accent.height) * slant
-- accent.yoffset = yoffset - (xheight - accent.height)
-- accent.left = accent.width
-- accent.options = accent.options | 0x40 | 0x80
-- context.dontleavehmode()
-- context(base)
-- context(zwj)
-- context(accent)
-- end
-- end,
-- }
--
-- end