summaryrefslogtreecommitdiff
path: root/tex/context/base
diff options
context:
space:
mode:
authorMarius <mariausol@gmail.com>2010-10-20 14:40:12 +0300
committerMarius <mariausol@gmail.com>2010-10-20 14:40:12 +0300
commitb23c2188805ed9f7bdbdbe11eed957a32e90f5ce (patch)
tree6a6587ef0407bcee4694c3c2af0debd697ed87c6 /tex/context/base
parentf93975efd76053e907d19114d4ba576ae44da134 (diff)
downloadcontext-b23c2188805ed9f7bdbdbe11eed957a32e90f5ce.tar.gz
beta 2010.10.20 13:11
Diffstat (limited to 'tex/context/base')
-rw-r--r--tex/context/base/char-cmp.lua1
-rw-r--r--tex/context/base/char-ini.lua151
-rw-r--r--tex/context/base/cont-new.tex2
-rw-r--r--tex/context/base/context.tex2
-rw-r--r--tex/context/base/font-ini.lua4
-rw-r--r--tex/context/base/font-ota.lua9
-rw-r--r--tex/context/base/font-tfm.lua6
-rw-r--r--tex/context/base/font-vf.lua12
-rw-r--r--tex/context/base/l-table.lua47
-rw-r--r--tex/context/base/mult-cld.lua6
-rw-r--r--tex/context/base/s-fnt-30.tex2
-rw-r--r--tex/context/base/s-reg-01.tex27
12 files changed, 123 insertions, 146 deletions
diff --git a/tex/context/base/char-cmp.lua b/tex/context/base/char-cmp.lua
index be75dc0fd..e522226f8 100644
--- a/tex/context/base/char-cmp.lua
+++ b/tex/context/base/char-cmp.lua
@@ -208,7 +208,6 @@ function characters.uncompose(n) -- n == string|number, returns string
else
cdn = characters.data[n]
end
- -- return characters.shape(n)
if cdn then
local shcode = cdn.shcode
if not shcode then
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index b3ac6fc22..f86eeaf66 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -11,10 +11,11 @@ local utf = unicode.utf8
local utfchar, utfbyte, utfvalues = utf.char, utf.byte, string.utfvalues
local concat, unpack = table.concat, table.unpack
-local next, tonumber, type = next, tonumber, type
+local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
local texsprint, texprint = tex.sprint, tex.print
local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch
local texsetlccode, texsetuccode, texsetsfcode, texsetcatcode = tex.setlccode, tex.setuccode, tex.setsfcode, tex.setcatcode
+local P, R, lpegmatch = lpeg.P, lpeg.R, lpeg.match
local allocate, mark = utilities.storage.allocate, utilities.storage.mark
@@ -58,7 +59,36 @@ storage.register("characters/ranges",characters.ranges,"characters.ranges")
local ranges = characters.ranges
+--[[ldx--
+<p>This converts a string (if given) into a number.</p>
+--ldx]]--
+
+local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end)
+
+lpeg.patterns.chartonumber = pattern
+
+local function chartonumber(k)
+ return type(k) == "string" and (lpegmatch(pattern,k) or utfbyte(k)) or k
+end
+
+--~ print(chartonumber(97), chartonumber("a"), chartonumber("0x61"), chartonumber("U+61"))
+
+characters.tonumber = chartonumber
+
setmetatablekey(data, "__index", function(t,k)
+ if type(k) == "string" then
+ k = lpegmatch(pattern,k) or utfbyte(k)
+ if k then
+ local tk = rawget(t,k)
+ if tk then
+ return tk
+ else
+ -- goes to ranges
+ end
+ else
+ return nil
+ end
+ end
for r=1,#ranges do
local rr = ranges[r] -- first in range
if k > rr and k <= data[rr].range then
@@ -392,10 +422,15 @@ use the table. After all, we have this information available anyway.</p>
function characters.makeactive(n,name) -- let ?
texsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
+ -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
end
-function tex.uprint(n)
- texsprint(ctxcatcodes,utfchar(n))
+function tex.uprint(c,n)
+ if n then
+ texsprint(c,utfchar(n))
+ else
+ texsprint(utfchar(c))
+ end
end
if texsetcatcode then
@@ -478,7 +513,7 @@ if texsetcatcode then
end
-else -- keep this
+else -- char-obs
local template_a = "\\startextendcatcodetable{%s}\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined\\stopextendcatcodetable"
local template_b = "\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined"
@@ -595,7 +630,7 @@ if texsetcatcode then
end
end
-else -- keep this one
+else -- char-obs
function characters.setcodes()
for code, chr in next, data do
@@ -623,69 +658,16 @@ of the official <l n='api'/>.</p>
--ldx]]--
--[[ldx--
-<p>This converts a string (if given) into a number.</p>
---ldx]]--
-
-function characters.number(n)
- if type(n) == "string" then return tonumber(n,16) else return n end
-end
-
---[[ldx--
-<p>Checking for valid characters.</p>
+<p>A couple of convenience methods. Beware, these are slower than directly
+accessing the data table.</p>
--ldx]]--
-function characters.is_valid(s)
- return s or ""
-end
-
-function characters.checked(s, default)
- return s or default
-end
-
-characters.valid = characters.is_valid
-
---[[ldx--
-<p></p>
---ldx]]--
--- set a table entry; index is number (can be different from unicodeslot)
+-- we could make them virtual: characters.contextnames[n]
-function characters.set(n, c)
- data[characters.number(n)] = c
-end
-
---[[ldx--
-<p>Get a table entry happens by number. Keep in mind that the unicodeslot
-can be different (not likely).</p>
---ldx]]--
-
-function characters.get(n)
- return data[characters.number(n)]
-end
-
---[[ldx--
-<p>A couple of convenience methods. Beware, these are not that fast due
-to the checking.</p>
---ldx]]--
-
-function characters.hexindex(n)
- return format("%04X", characters.valid(data[characters.number(n)].unicodeslot))
-end
-
-function characters.contextname(n)
- return characters.valid(data[characters.number(n)].contextname)
-end
-
-function characters.adobename(n)
- return characters.valid(data[characters.number(n)].adobename)
-end
-
-function characters.description(n)
- return characters.valid(data[characters.number(n)].description)
-end
-
-function characters.category(n)
- return characters.valid(data[characters.number(n)].category)
-end
+function characters.contextname(n) return data[n].contextname or "" end
+function characters.adobename (n) return data[n].adobename or "" end
+function characters.description(n) return data[n].description or "" end
+function characters.category (n) return data[n].category or "" end
--[[ldx--
<p>Requesting lower and uppercase codes:</p>
@@ -719,28 +701,6 @@ function characters.shape(n)
end
end
---[[ldx--
-<p>Categories play an important role, so here are some checkers.</p>
---ldx]]--
-
-function characters.is_of_category(token,category)
- if type(token) == "string" then
- return data[utfbyte(token)].category == category
- else
- return data[token].category == category
- end
-end
-
-function characters.i_is_of_category(i,category) -- by index (number)
- local cd = data[i]
- return cd and cd.category == category
-end
-
-function characters.n_is_of_category(n,category) -- by name (string)
- local cd = data[utfbyte(n)]
- return cd and cd.category == category
-end
-
-- xml support (moved)
function characters.remapentity(chr,slot)
@@ -779,13 +739,17 @@ end
utf.string = utf.string or utfstring
-characters.lccodes = allocate() local lccodes = characters.lccodes -- lazy table
-characters.uccodes = allocate() local uccodes = characters.uccodes -- lazy table
-characters.shcodes = allocate() local shcodes = characters.shcodes -- lazy table
+characters.categories = allocate() local categories = characters.categories -- lazy table
-setmetatable(lccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.lccode or u t[u] = c return c end end } )
-setmetatable(uccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.uccode or u t[u] = c return c end end } )
-setmetatable(shcodes, { __index = function(t,u) if u then local c = data[u] c = c and c.shcode or u t[u] = c return c end end } )
+setmetatable(categories, { __index = function(t,u) if u then local c = data[u] c = c and c.category or u t[u] = c return c end end } )
+
+characters.lccodes = allocate() local lccodes = characters.lccodes -- lazy table
+characters.uccodes = allocate() local uccodes = characters.uccodes -- lazy table
+characters.shcodes = allocate() local shcodes = characters.shcodes -- lazy table
+
+setmetatable(lccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.lccode or u t[u] = c return c end end } )
+setmetatable(uccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.uccode or u t[u] = c return c end end } )
+setmetatable(shcodes, { __index = function(t,u) if u then local c = data[u] c = c and c.shcode or u t[u] = c return c end end } )
characters.lcchars = allocate() local lcchars = characters.lcchars -- lazy table
characters.ucchars = allocate() local ucchars = characters.ucchars -- lazy table
@@ -856,7 +820,6 @@ function characters.lettered(str)
return concat(new)
end
-
-- -- some day we might go this route, but it does not really save that much
-- -- so not now (we can generate a lot using mtx-unicode that operates on the
-- -- database)
diff --git a/tex/context/base/cont-new.tex b/tex/context/base/cont-new.tex
index 5744d86a6..b8f5f2dff 100644
--- a/tex/context/base/cont-new.tex
+++ b/tex/context/base/cont-new.tex
@@ -11,7 +11,7 @@
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.
-\newcontextversion{2010.10.19 23:03}
+\newcontextversion{2010.10.20 13:11}
%D This file is loaded at runtime, thereby providing an
%D excellent place for hacks, patches, extensions and new
diff --git a/tex/context/base/context.tex b/tex/context/base/context.tex
index 00a426a14..77f42b1ab 100644
--- a/tex/context/base/context.tex
+++ b/tex/context/base/context.tex
@@ -20,7 +20,7 @@
%D your styles an modules.
\edef\contextformat {\jobname}
-\edef\contextversion{2010.10.19 23:03}
+\edef\contextversion{2010.10.20 13:11}
%D For those who want to use this:
diff --git a/tex/context/base/font-ini.lua b/tex/context/base/font-ini.lua
index fd4465d62..6082c1d1d 100644
--- a/tex/context/base/font-ini.lua
+++ b/tex/context/base/font-ini.lua
@@ -71,6 +71,10 @@ fonts.triggers = fonts.triggers or {
fonts.processors = fonts.processors or {
}
+fonts.analyzers = fonts.analyzers or {
+ useunicodemarks = false,
+}
+
fonts.manipulators = fonts.manipulators or {
}
diff --git a/tex/context/base/font-ota.lua b/tex/context/base/font-ota.lua
index d148eddbf..18b0bf2d8 100644
--- a/tex/context/base/font-ota.lua
+++ b/tex/context/base/font-ota.lua
@@ -42,11 +42,13 @@ local traverse_node_list = node.traverse
local fontdata = fonts.ids
local state = attributes.private('state')
+local categories = characters and characters.categories or { } -- sorry, only in context
local fontscolors = fonts.colors
local fcs = (fontscolors and fontscolors.set) or function() end
local fcr = (fontscolors and fontscolors.reset) or function() end
+
-- in the future we will use language/script attributes instead of the
-- font related value, but then we also need dynamic features which is
-- somewhat slower; and .. we need a chain of them
@@ -161,10 +163,6 @@ local isol_fina_medi_init = {
[0x077E] = true, [0x077F] = true, [zwj] = true,
}
-local mark = {
- [0x0650] = true,
-}
-
local arab_warned = { }
-- todo: gref
@@ -228,6 +226,7 @@ local function finish(first,last)
end
function analyzers.methods.arab(head,font,attr) -- maybe make a special version with no trace
+ local useunicodemarks = analyzers.useunicodemarks
local tfmdata = fontdata[font]
local marks = tfmdata.marks
local first, last, current, done = nil, nil, head, false
@@ -235,7 +234,7 @@ function analyzers.methods.arab(head,font,attr) -- maybe make a special version
if current.id == glyph_code and current.subtype<256 and current.font == font and not has_attribute(current,state) then
done = true
local char = current.char
- if marks[char] or mark[char] then
+ if marks[char] or (useunicodemarks and categories[char] == "mn") then
set_attribute(current,state,5) -- mark
if trace_analyzing then fcs(current,"font:mark") end
elseif isol[char] then -- can be zwj or zwnj too
diff --git a/tex/context/base/font-tfm.lua b/tex/context/base/font-tfm.lua
index 266bc7406..4d80f07e1 100644
--- a/tex/context/base/font-tfm.lua
+++ b/tex/context/base/font-tfm.lua
@@ -661,6 +661,7 @@ analyzers.initializers = analyzers.initializers or { }
local state = attributes.private('state')
function analyzers.aux.setstate(head,font)
+ local useunicodemarks = analyzers.useunicodemarks
local tfmdata = fontdata[font]
local characters = tfmdata.characters
local descriptions = tfmdata.descriptions
@@ -668,9 +669,10 @@ function analyzers.aux.setstate(head,font)
while current do
local id = current.id
if id == glyph_code and current.font == font then
- local d = descriptions[current.char]
+ local char = current.char
+ local d = descriptions[char]
if d then
- if d.class == "mark" then
+ if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then
done = true
set_attribute(current,state,5) -- mark
elseif n == 0 then
diff --git a/tex/context/base/font-vf.lua b/tex/context/base/font-vf.lua
index 89260d2f8..455646a22 100644
--- a/tex/context/base/font-vf.lua
+++ b/tex/context/base/font-vf.lua
@@ -34,6 +34,8 @@ vf.aux = vf.aux or { }
vf.aux.combine = vf.aux.combine or { }
local combine = vf.aux.combine
+local chardata = characters.data
+
function methods.install(tag, rules)
vf.combinations[tag] = rules
variants[tag] = function(specification)
@@ -200,8 +202,8 @@ methods.install(
-- todo: interface tables in back-ini
variants["demo-1"] = function(specification)
- local name = specification.name -- symbolic name
- local size = specification.size -- given size
+ local name = specification.name -- symbolic name
+ local size = specification.size -- given size
local f, id = tfm.readanddefine('lmroman10-regular',size)
if f and id then
local capscale, digscale = 0.85, 0.75
@@ -212,15 +214,15 @@ variants["demo-1"] = function(specification)
{ name = 'lmsans10-regular' , size = size*capscale }, -- forced extra name
{ name = 'lmtypewriter10-regular', size = size*digscale } -- forced extra name
}
- local i_is_of_category = characters.i_is_of_category
local characters, descriptions = f.characters, f.descriptions
local vfspecials = backends.tables.vfspecials
local red, green, blue, black = vfspecials.red, vfspecials.green, vfspecials.blue, vfspecials.black
for u,v in next, characters do
- if u and i_is_of_category(u,'lu') then
+ local category = chardata[u].category
+ if category == 'lu' then
v.width = capscale*v.width
v.commands = { red, { 'slot', 2, u }, black }
- elseif u and i_is_of_category(u,'nd') then
+ elseif category == 'nd' then
v.width = digscale*v.width
v.commands = { blue, { 'slot', 3, u }, black }
else
diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua
index b9753dd26..b661e7aaa 100644
--- a/tex/context/base/l-table.lua
+++ b/tex/context/base/l-table.lua
@@ -255,28 +255,6 @@ end
table.fastcopy = fastcopy
table.copy = copy
--- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack)
-
-function table.sub(t,i,j)
- return { unpack(t,i,j) }
-end
-
-function table.replace(a,b)
- for k,v in next, b do
- a[k] = v
- end
-end
-
--- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice)
-
-function table.is_empty(t) -- obolete, use inline code instead
- return not t or not next(t)
-end
-
-function table.has_one_entry(t)
- local n = next(t)
- return n and not next(t,n)
-end
function table.tohash(t,value)
local h = { }
@@ -872,3 +850,28 @@ end
function table.print(...)
table.tohandle(print,...)
end
+
+-- -- -- obsolete but we keep them for a while and will comment them later -- -- --
+
+-- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack)
+
+function table.sub(t,i,j)
+ return { unpack(t,i,j) }
+end
+
+-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice)
+
+function table.is_empty(t)
+ return not t or not next(t)
+end
+
+function table.has_one_entry(t)
+ local n = next(t)
+ return n and not next(t,n)
+end
+
+function table.replace(a,b)
+ for k,v in next, b do
+ a[k] = v
+ end
+end
diff --git a/tex/context/base/mult-cld.lua b/tex/context/base/mult-cld.lua
index 2ff0568c1..373300d69 100644
--- a/tex/context/base/mult-cld.lua
+++ b/tex/context/base/mult-cld.lua
@@ -345,6 +345,8 @@ function context.direct(first,...)
end
end
+-- todo: use flush directly
+
function context.char(k) -- todo: if catcode == letter or other then just the utf
if type(k) == "table" then
for i=1,#k do
@@ -355,6 +357,10 @@ function context.char(k) -- todo: if catcode == letter or other then just the ut
end
end
+function context.utfchar(k)
+ context(utfchar(k))
+end
+
function context.chardef(cs,u)
context(format([[\chardef\%s=%s\relax]],k))
end
diff --git a/tex/context/base/s-fnt-30.tex b/tex/context/base/s-fnt-30.tex
index 1daf6806e..d6d298011 100644
--- a/tex/context/base/s-fnt-30.tex
+++ b/tex/context/base/s-fnt-30.tex
@@ -19,7 +19,7 @@ function document.show_character_data(n)
if d then
local function entry(label,name)
NC() context(label)
- NC() context(characters.valid(d[name])
+ NC() context(d[name])
NC() NR()
end
context.starttabulate { "|Tl|Tl|]" }
diff --git a/tex/context/base/s-reg-01.tex b/tex/context/base/s-reg-01.tex
index 5ed86cb4e..d3e456881 100644
--- a/tex/context/base/s-reg-01.tex
+++ b/tex/context/base/s-reg-01.tex
@@ -26,22 +26,21 @@ function regimes.show(regime)
regime = regimes.synonyms[regime] or regime
local r = regimes.data[regime]
if r then
+ local chardata = characters.data
context.starttabulate { "|rT|T|rT|lT|lT|lT|" }
for k, v in ipairs(r) do
- context.NC()
- context(k)
- context.NC()
- context.getvalue(characters.contextname(v))
- context.NC()
- context(characters.hexindex(v))
- context.NC()
- context(characters.contextname(v))
- context.NC()
- context(characters.category(v))
- context.NC()
- context(characters.description(v))
- context.NC()
- context.NR()
+ local chr = chardata[v]
+ if chr then
+ context.NC() context(k)
+ context.NC() context.getvalue(chr.contextname])
+ context.NC() context("%U+05X",v)
+ context.NC() context(chr.contextname)
+ context.NC() context(chr.category)
+ context.NC() context(chr.description)
+ context.NC() context.NR()
+ else
+ -- can't happen
+ end
end
context.stoptabulate()
else