beta 2010.10.20 13:11

author: Marius <mariausol@gmail.com> 2010-10-20 14:40:12 +0300
committer: Marius <mariausol@gmail.com> 2010-10-20 14:40:12 +0300
commit: b23c2188805ed9f7bdbdbe11eed957a32e90f5ce (patch)
tree: 6a6587ef0407bcee4694c3c2af0debd697ed87c6 /tex/context/base
parent: f93975efd76053e907d19114d4ba576ae44da134 (diff)
download: context-b23c2188805ed9f7bdbdbe11eed957a32e90f5ce.tar.gz
12 files changed, 123 insertions, 146 deletions
diff --git a/tex/context/base/char-cmp.lua b/tex/context/base/char-cmp.lua
index be75dc0fd..e522226f8 100644
--- a/tex/context/base/char-cmp.lua
+++ b/tex/context/base/char-cmp.lua
@@ -208,7 +208,6 @@ function characters.uncompose(n) -- n == string|number, returns string
     else
         cdn = characters.data[n]
     end
-    -- return characters.shape(n)
     if cdn then
         local shcode = cdn.shcode
         if not shcode then
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index b3ac6fc22..f86eeaf66 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -11,10 +11,11 @@ local utf = unicode.utf8
 
 local utfchar, utfbyte, utfvalues = utf.char, utf.byte, string.utfvalues
 local concat, unpack = table.concat, table.unpack
-local next, tonumber, type = next, tonumber, type
+local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
 local texsprint, texprint = tex.sprint, tex.print
 local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch
 local texsetlccode, texsetuccode, texsetsfcode, texsetcatcode  = tex.setlccode, tex.setuccode, tex.setsfcode, tex.setcatcode
+local P, R, lpegmatch = lpeg.P, lpeg.R, lpeg.match
 
 local allocate, mark = utilities.storage.allocate, utilities.storage.mark
 
@@ -58,7 +59,36 @@ storage.register("characters/ranges",characters.ranges,"characters.ranges")
 
 local ranges = characters.ranges
 
+--[[ldx--
+<p>This converts a string (if given) into a number.</p>
+--ldx]]--
+
+local pattern = (P("0x") + P("U+")) * ((R("09","AF")^1 * P(-1)) / function(s) return tonumber(s,16) end)
+
+lpeg.patterns.chartonumber = pattern
+
+local function chartonumber(k)
+    return type(k) == "string" and (lpegmatch(pattern,k) or utfbyte(k)) or k
+end
+
+--~ print(chartonumber(97), chartonumber("a"), chartonumber("0x61"), chartonumber("U+61"))
+
+characters.tonumber = chartonumber
+
 setmetatablekey(data, "__index", function(t,k)
+    if type(k) == "string" then
+        k = lpegmatch(pattern,k) or utfbyte(k)
+        if k then
+            local tk = rawget(t,k)
+            if tk then
+                return tk
+            else
+                -- goes to ranges
+            end
+        else
+            return nil
+        end
+    end
     for r=1,#ranges do
         local rr = ranges[r] -- first in range
         if k > rr and k <= data[rr].range then
@@ -392,10 +422,15 @@ use the table. After all, we have this information available anyway.</p>
 
 function characters.makeactive(n,name) -- let ?
     texsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
+ -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
 end
 
-function tex.uprint(n)
-    texsprint(ctxcatcodes,utfchar(n))
+function tex.uprint(c,n)
+    if n then
+        texsprint(c,utfchar(n))
+    else
+        texsprint(utfchar(c))
+    end
 end
 
 if texsetcatcode then
@@ -478,7 +513,7 @@ if texsetcatcode then
 
     end
 
-else -- keep this
+else -- char-obs
 
     local template_a = "\\startextendcatcodetable{%s}\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined\\stopextendcatcodetable"
     local template_b = "\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined"
@@ -595,7 +630,7 @@ if texsetcatcode then
         end
     end
 
-else -- keep this one
+else -- char-obs
 
     function characters.setcodes()
         for code, chr in next, data do
@@ -623,69 +658,16 @@ of the official <l n='api'/>.</p>
 --ldx]]--
 
 --[[ldx--
-<p>This converts a string (if given) into a number.</p>
---ldx]]--
-
-function characters.number(n)
-    if type(n) == "string" then return tonumber(n,16) else return n end
-end
-
---[[ldx--
-<p>Checking for valid characters.</p>
+<p>A couple of convenience methods. Beware, these are slower than directly
+accessing the data table.</p>
 --ldx]]--
 
-function characters.is_valid(s)
-    return s or ""
-end
-
-function characters.checked(s, default)
-    return s or default
-end
-
-characters.valid = characters.is_valid
-
---[[ldx--
-<p></p>
---ldx]]--
--- set a table entry; index is number (can be different from unicodeslot)
+-- we could make them virtual: characters.contextnames[n]
 
-function characters.set(n, c)
-    data[characters.number(n)] = c
-end
-
---[[ldx--
-<p>Get a table entry happens by number. Keep in mind that the unicodeslot
-can be different (not likely).</p>
---ldx]]--
-
-function characters.get(n)
-    return data[characters.number(n)]
-end
-
---[[ldx--
-<p>A couple of convenience methods. Beware, these are not that fast due
-to the checking.</p>
---ldx]]--
-
-function characters.hexindex(n)
-    return format("%04X", characters.valid(data[characters.number(n)].unicodeslot))
-end
-
-function characters.contextname(n)
-    return characters.valid(data[characters.number(n)].contextname)
-end
-
-function characters.adobename(n)
-    return characters.valid(data[characters.number(n)].adobename)
-end
-
-function characters.description(n)
-    return characters.valid(data[characters.number(n)].description)
-end
-
-function characters.category(n)
-    return characters.valid(data[characters.number(n)].category)
-end
+function characters.contextname(n) return data[n].contextname or "" end
+function characters.adobename  (n) return data[n].adobename   or "" end
+function characters.description(n) return data[n].description or "" end
+function characters.category   (n) return data[n].category    or "" end
 
 --[[ldx--
 <p>Requesting lower and uppercase codes:</p>
@@ -719,28 +701,6 @@ function characters.shape(n)
     end
 end
 
---[[ldx--
-<p>Categories play an important role, so here are some checkers.</p>
---ldx]]--
-
-function characters.is_of_category(token,category)
-    if type(token) == "string" then
-        return data[utfbyte(token)].category == category
-    else
-        return data[token].category == category
-    end
-end
-
-function characters.i_is_of_category(i,category) -- by index (number)
-    local cd = data[i]
-    return cd and cd.category == category
-end
-
-function characters.n_is_of_category(n,category) -- by name (string)
-    local cd = data[utfbyte(n)]
-    return cd and cd.category == category
-end
-
 -- xml support (moved)
 
 function characters.remapentity(chr,slot)
@@ -779,13 +739,17 @@ end
 
 utf.string = utf.string or utfstring
 
-characters.lccodes = allocate()  local lccodes = characters.lccodes -- lazy table
-characters.uccodes = allocate()  local uccodes = characters.uccodes -- lazy table
-characters.shcodes = allocate()  local shcodes = characters.shcodes -- lazy table
+characters.categories = allocate()  local categories = characters.categories -- lazy table
 
-setmetatable(lccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.lccode or u t[u] = c return c end end } )
-setmetatable(uccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.uccode or u t[u] = c return c end end } )
-setmetatable(shcodes, { __index = function(t,u) if u then local c = data[u] c = c and c.shcode or u t[u] = c return c end end } )
+setmetatable(categories, { __index = function(t,u) if u then local c = data[u] c = c and c.category or u t[u] = c return c end end } )
+
+characters.lccodes    = allocate()  local lccodes    = characters.lccodes    -- lazy table
+characters.uccodes    = allocate()  local uccodes    = characters.uccodes    -- lazy table
+characters.shcodes    = allocate()  local shcodes    = characters.shcodes    -- lazy table
+
+setmetatable(lccodes,    { __index = function(t,u) if u then local c = data[u] c = c and c.lccode   or u t[u] = c return c end end } )
+setmetatable(uccodes,    { __index = function(t,u) if u then local c = data[u] c = c and c.uccode   or u t[u] = c return c end end } )
+setmetatable(shcodes,    { __index = function(t,u) if u then local c = data[u] c = c and c.shcode   or u t[u] = c return c end end } )
 
 characters.lcchars = allocate()  local lcchars = characters.lcchars -- lazy table
 characters.ucchars = allocate()  local ucchars = characters.ucchars -- lazy table
@@ -856,7 +820,6 @@ function characters.lettered(str)
     return concat(new)
 end
 
-
 -- -- some day we might go this route, but it does not really save that much
 -- -- so not now (we can generate a lot using mtx-unicode that operates on the
 -- -- database)
diff --git a/tex/context/base/cont-new.tex b/tex/context/base/cont-new.tex
index 5744d86a6..b8f5f2dff 100644
--- a/tex/context/base/cont-new.tex
+++ b/tex/context/base/cont-new.tex
@@ -11,7 +11,7 @@
 %C therefore copyrighted by \PRAGMA. See mreadme.pdf for
 %C details.
 
-\newcontextversion{2010.10.19 23:03}
+\newcontextversion{2010.10.20 13:11}
 
 %D This file is loaded at runtime, thereby providing an
 %D excellent place for hacks, patches, extensions and new
diff --git a/tex/context/base/context.tex b/tex/context/base/context.tex
index 00a426a14..77f42b1ab 100644
--- a/tex/context/base/context.tex
+++ b/tex/context/base/context.tex
@@ -20,7 +20,7 @@
 %D your styles an modules.
 
 \edef\contextformat {\jobname}
-\edef\contextversion{2010.10.19 23:03}
+\edef\contextversion{2010.10.20 13:11}
 
 %D For those who want to use this:
 
diff --git a/tex/context/base/font-ini.lua b/tex/context/base/font-ini.lua
index fd4465d62..6082c1d1d 100644
--- a/tex/context/base/font-ini.lua
+++ b/tex/context/base/font-ini.lua
@@ -71,6 +71,10 @@ fonts.triggers = fonts.triggers or {
 fonts.processors = fonts.processors or {
 }
 
+fonts.analyzers = fonts.analyzers or {
+    useunicodemarks = false,
+}
+
 fonts.manipulators = fonts.manipulators or {
 }
 
diff --git a/tex/context/base/font-ota.lua b/tex/context/base/font-ota.lua
index d148eddbf..18b0bf2d8 100644
--- a/tex/context/base/font-ota.lua
+++ b/tex/context/base/font-ota.lua
@@ -42,11 +42,13 @@ local traverse_node_list = node.traverse
 
 local fontdata           = fonts.ids
 local state              = attributes.private('state')
+local categories         = characters and characters.categories or { } -- sorry, only in context
 
 local fontscolors        = fonts.colors
 local fcs                = (fontscolors and fontscolors.set)   or function() end
 local fcr                = (fontscolors and fontscolors.reset) or function() end
 
+
 -- in the future we will use language/script attributes instead of the
 -- font related value, but then we also need dynamic features which is
 -- somewhat slower; and .. we need a chain of them
@@ -161,10 +163,6 @@ local isol_fina_medi_init = {
     [0x077E] = true, [0x077F] = true, [zwj] = true,
 }
 
-local mark = {
-    [0x0650] = true,
-}
-
 local arab_warned = { }
 
 -- todo: gref
@@ -228,6 +226,7 @@ local function finish(first,last)
 end
 
 function analyzers.methods.arab(head,font,attr) -- maybe make a special version with no trace
+    local useunicodemarks = analyzers.useunicodemarks
     local tfmdata = fontdata[font]
     local marks = tfmdata.marks
     local first, last, current, done = nil, nil, head, false
@@ -235,7 +234,7 @@ function analyzers.methods.arab(head,font,attr) -- maybe make a special version
         if current.id == glyph_code and current.subtype<256 and current.font == font and not has_attribute(current,state) then
             done = true
             local char = current.char
-            if marks[char] or mark[char] then
+            if marks[char] or (useunicodemarks and categories[char] == "mn") then
                 set_attribute(current,state,5) -- mark
                 if trace_analyzing then fcs(current,"font:mark") end
             elseif isol[char] then -- can be zwj or zwnj too
diff --git a/tex/context/base/font-tfm.lua b/tex/context/base/font-tfm.lua
index 266bc7406..4d80f07e1 100644
--- a/tex/context/base/font-tfm.lua
+++ b/tex/context/base/font-tfm.lua
@@ -661,6 +661,7 @@ analyzers.initializers = analyzers.initializers or { }
 local state = attributes.private('state')
 
 function analyzers.aux.setstate(head,font)
+    local useunicodemarks  = analyzers.useunicodemarks
     local tfmdata = fontdata[font]
     local characters = tfmdata.characters
     local descriptions = tfmdata.descriptions
@@ -668,9 +669,10 @@ function analyzers.aux.setstate(head,font)
     while current do
         local id = current.id
         if id == glyph_code and current.font == font then
-            local d = descriptions[current.char]
+            local char = current.char
+            local d = descriptions[char]
             if d then
-                if d.class == "mark" then
+                if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then
                     done = true
                     set_attribute(current,state,5) -- mark
                 elseif n == 0 then
diff --git a/tex/context/base/font-vf.lua b/tex/context/base/font-vf.lua
index 89260d2f8..455646a22 100644
--- a/tex/context/base/font-vf.lua
+++ b/tex/context/base/font-vf.lua
@@ -34,6 +34,8 @@ vf.aux          = vf.aux          or { }
 vf.aux.combine  = vf.aux.combine  or { }
 local combine   = vf.aux.combine
 
+local chardata = characters.data
+
 function methods.install(tag, rules)
     vf.combinations[tag] = rules
     variants[tag] = function(specification)
@@ -200,8 +202,8 @@ methods.install(
 -- todo: interface tables in back-ini
 
 variants["demo-1"] = function(specification)
-    local name = specification.name          -- symbolic name
-    local size = specification.size          -- given size
+    local name = specification.name -- symbolic name
+    local size = specification.size -- given size
     local f, id = tfm.readanddefine('lmroman10-regular',size)
     if f and id then
         local capscale, digscale = 0.85, 0.75
@@ -212,15 +214,15 @@ variants["demo-1"] = function(specification)
             { name = 'lmsans10-regular'      , size = size*capscale }, -- forced extra name
             { name = 'lmtypewriter10-regular', size = size*digscale }  -- forced extra name
         }
-        local i_is_of_category = characters.i_is_of_category
         local characters, descriptions = f.characters, f.descriptions
         local vfspecials = backends.tables.vfspecials
         local red, green, blue, black = vfspecials.red, vfspecials.green, vfspecials.blue, vfspecials.black
         for u,v in next, characters do
-            if u and i_is_of_category(u,'lu') then
+            local category = chardata[u].category
+            if category == 'lu' then
                 v.width = capscale*v.width
                 v.commands = { red, { 'slot', 2, u }, black }
-            elseif u and i_is_of_category(u,'nd') then
+            elseif category == 'nd' then
                 v.width = digscale*v.width
                 v.commands = { blue, { 'slot', 3, u }, black }
             else
diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua
index b9753dd26..b661e7aaa 100644
--- a/tex/context/base/l-table.lua
+++ b/tex/context/base/l-table.lua
@@ -255,28 +255,6 @@ end
 table.fastcopy = fastcopy
 table.copy     = copy
 
--- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack)
-
-function table.sub(t,i,j)
-    return { unpack(t,i,j) }
-end
-
-function table.replace(a,b)
-    for k,v in next, b do
-        a[k] = v
-    end
-end
-
--- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice)
-
-function table.is_empty(t) -- obolete, use inline code instead
-    return not t or not next(t)
-end
-
-function table.has_one_entry(t)
-    local n = next(t)
-    return n and not next(t,n)
-end
 
 function table.tohash(t,value)
     local h = { }
@@ -872,3 +850,28 @@ end
 function table.print(...)
     table.tohandle(print,...)
 end
+
+-- -- -- obsolete but we keep them for a while and will comment them later -- -- --
+
+-- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack)
+
+function table.sub(t,i,j)
+    return { unpack(t,i,j) }
+end
+
+-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice)
+
+function table.is_empty(t)
+    return not t or not next(t)
+end
+
+function table.has_one_entry(t)
+    local n = next(t)
+    return n and not next(t,n)
+end
+
+function table.replace(a,b)
+    for k,v in next, b do
+        a[k] = v
+    end
+end
diff --git a/tex/context/base/mult-cld.lua b/tex/context/base/mult-cld.lua
index 2ff0568c1..373300d69 100644
--- a/tex/context/base/mult-cld.lua
+++ b/tex/context/base/mult-cld.lua
@@ -345,6 +345,8 @@ function context.direct(first,...)
     end
 end
 
+-- todo: use flush directly
+
 function context.char(k) -- todo: if catcode == letter or other then just the utf
     if type(k) == "table" then
         for i=1,#k do
@@ -355,6 +357,10 @@ function context.char(k) -- todo: if catcode == letter or other then just the ut
     end
 end
 
+function context.utfchar(k)
+    context(utfchar(k))
+end
+
 function context.chardef(cs,u)
     context(format([[\chardef\%s=%s\relax]],k))
 end
diff --git a/tex/context/base/s-fnt-30.tex b/tex/context/base/s-fnt-30.tex
index 1daf6806e..d6d298011 100644
--- a/tex/context/base/s-fnt-30.tex
+++ b/tex/context/base/s-fnt-30.tex
@@ -19,7 +19,7 @@ function document.show_character_data(n)
     if d then
         local function entry(label,name)
             NC() context(label)
-            NC() context(characters.valid(d[name])
+            NC() context(d[name])
             NC() NR()
         end
         context.starttabulate { "|Tl|Tl|]" }
diff --git a/tex/context/base/s-reg-01.tex b/tex/context/base/s-reg-01.tex
index 5ed86cb4e..d3e456881 100644
--- a/tex/context/base/s-reg-01.tex
+++ b/tex/context/base/s-reg-01.tex
@@ -26,22 +26,21 @@ function regimes.show(regime)
     regime = regimes.synonyms[regime] or regime
     local r = regimes.data[regime]
     if r then
+        local chardata = characters.data
         context.starttabulate { "|rT|T|rT|lT|lT|lT|" }
         for k, v in ipairs(r) do
-            context.NC()
-            context(k)
-            context.NC()
-            context.getvalue(characters.contextname(v))
-            context.NC()
-            context(characters.hexindex(v))
-            context.NC()
-            context(characters.contextname(v))
-            context.NC()
-            context(characters.category(v))
-            context.NC()
-            context(characters.description(v))
-            context.NC()
-            context.NR()
+            local chr = chardata[v]
+            if chr then
+                context.NC() context(k)
+                context.NC() context.getvalue(chr.contextname])
+                context.NC() context("%U+05X",v)
+                context.NC() context(chr.contextname)
+                context.NC() context(chr.category)
+                context.NC() context(chr.description)
+                context.NC() context.NR()
+            else
+                -- can't happen
+            end
         end
         context.stoptabulate()
     else
author	Marius <mariausol@gmail.com>	2010-10-20 14:40:12 +0300
committer	Marius <mariausol@gmail.com>	2010-10-20 14:40:12 +0300
commit	b23c2188805ed9f7bdbdbe11eed957a32e90f5ce (patch)
tree	6a6587ef0407bcee4694c3c2af0debd697ed87c6 /tex/context/base
parent	f93975efd76053e907d19114d4ba576ae44da134 (diff)
download	context-b23c2188805ed9f7bdbdbe11eed957a32e90f5ce.tar.gz