summaryrefslogtreecommitdiff
path: root/tex/context/base
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2014-10-06 01:15:04 +0200
committerContext Git Mirror Bot <phg42.2a@gmail.com>2014-10-06 01:15:04 +0200
commit0e1fbe2dcb089fd8c8de066587c860a9aae8cb61 (patch)
tree10b838a7687eebe508cc3f20bd76ceb4fded12c9 /tex/context/base
parente891b55d71c2b4139dd25f40cfe344a4899a1ef2 (diff)
downloadcontext-0e1fbe2dcb089fd8c8de066587c860a9aae8cb61.tar.gz
2014-10-06 00:31:00
Diffstat (limited to 'tex/context/base')
-rw-r--r--tex/context/base/cont-new.mkiv2
-rw-r--r--tex/context/base/context-version.pdfbin4381 -> 4381 bytes
-rw-r--r--tex/context/base/context.mkiv2
-rw-r--r--tex/context/base/data-tex.lua8
-rw-r--r--tex/context/base/font-afm.lua19
-rw-r--r--tex/context/base/font-map.lua391
-rw-r--r--tex/context/base/font-mis.lua2
-rw-r--r--tex/context/base/font-otf.lua2
-rw-r--r--tex/context/base/l-lpeg.lua3
-rw-r--r--tex/context/base/l-unicode.lua635
-rw-r--r--tex/context/base/lang-url.mkiv28
-rw-r--r--tex/context/base/lpdf-epa.lua7
-rw-r--r--tex/context/base/lpdf-epd.lua329
-rw-r--r--tex/context/base/lpdf-fld.lua6
-rw-r--r--tex/context/base/lpdf-ini.lua445
-rw-r--r--tex/context/base/regi-ini.lua34
-rw-r--r--tex/context/base/regi-pdfdoc.lua26
-rw-r--r--tex/context/base/status-files.pdfbin24685 -> 24731 bytes
-rw-r--r--tex/context/base/status-lua.pdfbin332174 -> 333371 bytes
-rw-r--r--tex/context/base/strc-bkm.mkiv32
-rw-r--r--tex/context/base/supp-box.lua17
-rw-r--r--tex/context/base/supp-box.mkiv18
22 files changed, 1029 insertions, 977 deletions
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index 51f9ed8a0..b4c6976b4 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.
-\newcontextversion{2014.10.03 19:27}
+\newcontextversion{2014.10.06 00:29}
%D This file is loaded at runtime, thereby providing an excellent place for
%D hacks, patches, extensions and new features.
diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf
index 53e291920..9069b051b 100644
--- a/tex/context/base/context-version.pdf
+++ b/tex/context/base/context-version.pdf
Binary files differ
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index 0182e23a2..e76ba90d7 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -28,7 +28,7 @@
%D up and the dependencies are more consistent.
\edef\contextformat {\jobname}
-\edef\contextversion{2014.10.03 19:27}
+\edef\contextversion{2014.10.06 00:29}
\edef\contextkind {beta}
%D For those who want to use this:
diff --git a/tex/context/base/data-tex.lua b/tex/context/base/data-tex.lua
index 04c5ef469..b6b97a0a9 100644
--- a/tex/context/base/data-tex.lua
+++ b/tex/context/base/data-tex.lua
@@ -77,13 +77,13 @@ function helpers.textopener(tag,filename,filehandle,coding)
report_tex("%a opener: %a opened using method %a",tag,filename,coding)
end
if coding == "utf-16-be" then
- lines = utf.utf16_to_utf8_be(lines)
+ lines = utf.utf16_to_utf8_be_t(lines)
elseif coding == "utf-16-le" then
- lines = utf.utf16_to_utf8_le(lines)
+ lines = utf.utf16_to_utf8_le_t(lines)
elseif coding == "utf-32-be" then
- lines = utf.utf32_to_utf8_be(lines)
+ lines = utf.utf32_to_utf8_be_t(lines)
elseif coding == "utf-32-le" then
- lines = utf.utf32_to_utf8_le(lines)
+ lines = utf.utf32_to_utf8_le_t(lines)
else -- utf8 or unknown (could be a mkvi file)
local runner = textfileactions.runner
if runner then
diff --git a/tex/context/base/font-afm.lua b/tex/context/base/font-afm.lua
index e5c9af759..ca5616a1e 100644
--- a/tex/context/base/font-afm.lua
+++ b/tex/context/base/font-afm.lua
@@ -64,6 +64,8 @@ afm.addligatures = true -- best leave this set to true
afm.addtexligatures = true -- best leave this set to true
afm.addkerns = true -- best leave this set to true
+local overloads = fonts.mappings.overloads
+
local applyruntimefixes = fonts.treatments and fonts.treatments.applyfixes
local function setmode(tfmdata,value)
@@ -81,16 +83,6 @@ registerafmfeature {
}
}
-local remappednames = {
- ff = { name = "f_f", unicode = { 0x66, 0x66 } },
- fi = { name = "f_i", unicode = { 0x66, 0x69 } },
- fj = { name = "f_j", unicode = { 0x66, 0x6A } },
- fk = { name = "f_k", unicode = { 0x66, 0x6B } },
- fl = { name = "f_l", unicode = { 0x66, 0x6C } },
- ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 } },
- ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C } },
-}
-
--[[ldx--
<p>We start with the basic reader which we give a name similar to the
built in <l n='tfm'/> and <l n='otf'/> reader.</p>
@@ -456,12 +448,13 @@ end
fixnames = function(data)
for k, v in next, data.descriptions do
local n = v.name
- local r = remappednames[n]
+ local r = overloads[n]
if r then
+ local name = r.name
if trace_indexing then
- report_afm("renaming characters %a to %a",n,r.name)
+ report_afm("renaming characters %a to %a",n,name)
end
- v.name = r.name
+ v.name = name
v.unicode = r.unicode
end
end
diff --git a/tex/context/base/font-map.lua b/tex/context/base/font-map.lua
index 309435e0d..890e47d3f 100644
--- a/tex/context/base/font-map.lua
+++ b/tex/context/base/font-map.lua
@@ -6,12 +6,13 @@ if not modules then modules = { } end modules ['font-map'] = {
license = "see context related readme files"
}
-local tonumber = tonumber
+local tonumber, next, type = tonumber, next, type
local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match
local utfbyte = utf.byte
local floor = math.floor
+local formatters = string.formatters
local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end)
local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end)
@@ -66,11 +67,14 @@ local function makenameparser(str)
end
end
+local f_single = formatters["%04X"]
+local f_double = formatters["%04X%04X"]
+
local function tounicode16(unicode,name)
if unicode < 0x10000 then
- return format("%04X",unicode)
+ return f_single(unicode)
elseif unicode < 0x1FFFFFFFFF then
- return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ return f_double(floor(unicode/1024),unicode%1024+0xDC00)
else
report_fonts("can't convert %a in %a into tounicode",unicode,name)
end
@@ -81,9 +85,9 @@ local function tounicode16sequence(unicodes,name)
for l=1,#unicodes do
local u = unicodes[l]
if u < 0x10000 then
- t[l] = format("%04X",u)
+ t[l] = f_single(u)
elseif unicode < 0x1FFFFFFFFF then
- t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+ t[l] = f_double(floor(u/1024),u%1024+0xDC00)
else
report_fonts ("can't convert %a in %a into tounicode",u,name)
return
@@ -98,9 +102,9 @@ local function tounicode(unicode,name)
for l=1,#unicode do
local u = unicode[l]
if u < 0x10000 then
- t[l] = format("%04X",u)
+ t[l] = f_single(u)
elseif u < 0x1FFFFFFFFF then
- t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+ t[l] = f_double(floor(u/1024),u%1024+0xDC00)
else
report_fonts ("can't convert %a in %a into tounicode",u,name)
return
@@ -109,9 +113,9 @@ local function tounicode(unicode,name)
return concat(t)
else
if unicode < 0x10000 then
- return format("%04X",unicode)
+ return f_single(unicode)
elseif unicode < 0x1FFFFFFFFF then
- return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ return f_double(floor(unicode/1024),unicode%1024+0xDC00)
else
report_fonts("can't convert %a in %a into tounicode",unicode,name)
end
@@ -187,321 +191,35 @@ local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator *
-- test("such_so_more")
-- test("such_so_more.that")
--- function mappings.addtounicode(data,filename)
--- local resources = data.resources
--- local properties = data.properties
--- local descriptions = data.descriptions
--- local unicodes = resources.unicodes
--- local lookuptypes = resources.lookuptypes
--- if not unicodes then
--- return
--- end
--- -- we need to move this code
--- unicodes['space'] = unicodes['space'] or 32
--- unicodes['hyphen'] = unicodes['hyphen'] or 45
--- unicodes['zwj'] = unicodes['zwj'] or 0x200D
--- unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
--- -- the tounicode mapping is sparse and only needed for alternatives
--- local private = fonts.constructors.privateoffset
--- local unknown = format("%04X",utfbyte("?"))
--- local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
--- ----- namevector = fonts.encodings.agl.names -- loaded runtime in context
--- local tounicode = { }
--- local originals = { }
--- local missing = { }
--- resources.tounicode = tounicode
--- resources.originals = originals
--- local lumunic, uparser, oparser
--- local cidinfo, cidnames, cidcodes, usedmap
--- -- if false then -- will become an option
--- -- lumunic = loadlumtable(filename)
--- -- lumunic = lumunic and lumunic.tounicode
--- -- end
--- --
--- cidinfo = properties.cidinfo
--- usedmap = cidinfo and fonts.cid.getmap(cidinfo)
--- --
--- if usedmap then
--- oparser = usedmap and makenameparser(cidinfo.ordering)
--- cidnames = usedmap.names
--- cidcodes = usedmap.unicodes
--- end
--- uparser = makenameparser()
--- local ns, nl = 0, 0
--- for unic, glyph in next, descriptions do
--- local index = glyph.index
--- local name = glyph.name
--- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
--- local unicode = lumunic and lumunic[name] or unicodevector[name]
--- if unicode then
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- ns = ns + 1
--- end
--- -- cidmap heuristics, beware, there is no guarantee for a match unless
--- -- the chain resolves
--- if (not unicode) and usedmap then
--- local foundindex = lpegmatch(oparser,name)
--- if foundindex then
--- unicode = cidcodes[foundindex] -- name to number
--- if unicode then
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- ns = ns + 1
--- else
--- local reference = cidnames[foundindex] -- number to name
--- if reference then
--- local foundindex = lpegmatch(oparser,reference)
--- if foundindex then
--- unicode = cidcodes[foundindex]
--- if unicode then
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- ns = ns + 1
--- end
--- end
--- if not unicode or unicode == "" then
--- local foundcodes, multiple = lpegmatch(uparser,reference)
--- if foundcodes then
--- originals[index] = foundcodes
--- if multiple then
--- tounicode[index] = tounicode16sequence(foundcodes)
--- nl = nl + 1
--- unicode = true
--- else
--- tounicode[index] = tounicode16(foundcodes,name)
--- ns = ns + 1
--- unicode = foundcodes
--- end
--- end
--- end
--- end
--- end
--- end
--- end
--- -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_
--- --
--- -- It is not trivial to find a solution that suits all fonts. We tried several alternatives
--- -- and this one seems to work reasonable also with fonts that use less standardized naming
--- -- schemes. The extra private test is tested by KE and seems to work okay with non-typical
--- -- fonts as well.
--- --
--- -- The next time I look into this, I'll add an extra analysis step to the otf loader (we can
--- -- resolve some tounicodes by looking into the gsub data tables that are bound to glyphs.
--- --
--- if not unicode or unicode == "" then
--- local split = lpegmatch(namesplitter,name)
--- local nsplit = split and #split or 0
--- local t, n = { }, 0
--- unicode = true
--- for l=1,nsplit do
--- local base = split[l]
--- local u = unicodes[base] or unicodevector[base]
--- if not u then
--- break
--- elseif type(u) == "table" then
--- if u[1] >= private then
--- unicode = false
--- break
--- end
--- n = n + 1
--- t[n] = u[1]
--- else
--- if u >= private then
--- unicode = false
--- break
--- end
--- n = n + 1
--- t[n] = u
--- end
--- end
--- if n == 0 then -- done then
--- -- nothing
--- elseif n == 1 then
--- local unicode = t[1]
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- else
--- originals[index] = t
--- tounicode[index] = tounicode16sequence(t)
--- end
--- nl = nl + 1
--- end
--- -- last resort (we might need to catch private here as well)
--- if not unicode or unicode == "" then
--- local foundcodes, multiple = lpegmatch(uparser,name)
--- if foundcodes then
--- if multiple then
--- originals[index] = foundcodes
--- tounicode[index] = tounicode16sequence(foundcodes,name)
--- nl = nl + 1
--- unicode = true
--- else
--- originals[index] = foundcodes
--- tounicode[index] = tounicode16(foundcodes,name)
--- ns = ns + 1
--- unicode = foundcodes
--- end
--- end
--- end
--- -- check using substitutes and alternates
--- --
--- if not unicode then
--- missing[name] = true
--- end
--- -- if not unicode then
--- -- originals[index] = 0xFFFD
--- -- tounicode[index] = "FFFD"
--- -- end
--- end
--- end
--- if next(missing) then
--- local guess = { }
--- -- helper
--- local function check(gname,code,unicode)
--- local description = descriptions[code]
--- -- no need to add a self reference
--- local variant = description.name
--- if variant == gname then
--- return
--- end
--- -- the variant already has a unicode (normally that resultrs in a default tounicode to self)
--- local unic = unicodes[variant]
--- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
--- -- no default mapping and therefore maybe no tounicode yet
--- else
--- return
--- end
--- -- the variant already has a tounicode
--- local index = descriptions[code].index
--- if tounicode[index] then
--- return
--- end
--- -- add to the list
--- local g = guess[variant]
--- if g then
--- g[gname] = unicode
--- else
--- guess[variant] = { [gname] = unicode }
--- end
--- end
--- --
--- for unicode, description in next, descriptions do
--- local slookups = description.slookups
--- if slookups then
--- local gname = description.name
--- for tag, data in next, slookups do
--- local lookuptype = lookuptypes[tag]
--- if lookuptype == "alternate" then
--- for i=1,#data do
--- check(gname,data[i],unicode)
--- end
--- elseif lookuptype == "substitution" then
--- check(gname,data,unicode)
--- end
--- end
--- end
--- local mlookups = description.mlookups
--- if mlookups then
--- local gname = description.name
--- for tag, list in next, mlookups do
--- local lookuptype = lookuptypes[tag]
--- if lookuptype == "alternate" then
--- for i=1,#list do
--- local data = list[i]
--- for i=1,#data do
--- check(gname,data[i],unicode)
--- end
--- end
--- elseif lookuptype == "substitution" then
--- for i=1,#list do
--- check(gname,list[i],unicode)
--- end
--- end
--- end
--- end
--- end
--- -- resolve references
--- local done = true
--- while done do
--- done = false
--- for k, v in next, guess do
--- if type(v) ~= "number" then
--- for kk, vv in next, v do
--- if vv == -1 or vv >= private or (vv >= 0xE000 and vv <= 0xF8FF) or vv == 0xFFFE or vv == 0xFFFF then
--- local uu = guess[kk]
--- if type(uu) == "number" then
--- guess[k] = uu
--- done = true
--- end
--- else
--- guess[k] = vv
--- done = true
--- end
--- end
--- end
--- end
--- end
--- -- generate tounicodes
--- for k, v in next, guess do
--- if type(v) == "number" then
--- guess[k] = tounicode16(v)
--- else
--- local t = nil
--- local l = lower(k)
--- local u = unicodes[l]
--- if not u then
--- -- forget about it
--- elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then
--- local du = descriptions[u]
--- local index = du.index
--- t = tounicode[index]
--- if t then
--- tounicode[index] = v
--- originals[index] = unicode
--- end
--- else
--- -- t = u
--- end
--- if t then
--- guess[k] = t
--- else
--- guess[k] = "FFFD"
--- end
--- end
--- end
--- local orphans = 0
--- local guessed = 0
--- for k, v in next, guess do
--- if v == "FFFD" then
--- orphans = orphans + 1
--- guess[k] = false
--- else
--- guessed = guessed + 1
--- guess[k] = true
--- end
--- end
--- -- resources.nounicode = guess -- only when we test things
--- if trace_loading and orphans > 0 or guessed > 0 then
--- report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans)
--- end
--- end
--- if trace_mapping then
--- for unic, glyph in table.sortedhash(descriptions) do
--- local name = glyph.name
--- local index = glyph.index
--- local toun = tounicode[index]
--- if toun then
--- report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun)
--- else
--- report_fonts("internal slot %U, name %a, unicode %U",index,name,unic)
--- end
--- end
--- end
--- if trace_loading and (ns > 0 or nl > 0) then
--- report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns)
--- end
--- end
+-- to be completed .. for fonts that use unicodes for ligatures which
+-- is a actually a bad thing and should be avoided in the first place
+
+local overloads = {
+ IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 },
+ ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 },
+ ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 },
+ fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 },
+ fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 },
+ ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 },
+ ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 },
+ fj = { name = "f_j", unicode = { 0x66, 0x6A } },
+ fk = { name = "f_k", unicode = { 0x66, 0x6B } },
+}
+
+require("char-ini")
+
+for k, v in next, overloads do
+ local name = v.name
+ local mess = v.mess
+ if name then
+ overloads[name] = v
+ end
+ if mess then
+ overloads[mess] = v
+ end
+end
+
+mappings.overloads = overloads
function mappings.addtounicode(data,filename)
local resources = data.resources
@@ -513,12 +231,11 @@ function mappings.addtounicode(data,filename)
return
end
-- we need to move this code
- unicodes['space'] = unicodes['space'] or 32
- unicodes['hyphen'] = unicodes['hyphen'] or 45
- unicodes['zwj'] = unicodes['zwj'] or 0x200D
- unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
+ unicodes['space'] = unicodes['space'] or 32
+ unicodes['hyphen'] = unicodes['hyphen'] or 45
+ unicodes['zwj'] = unicodes['zwj'] or 0x200D
+ unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
local private = fonts.constructors.privateoffset
- local unknown = format("%04X",utfbyte("?"))
local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
----- namevector = fonts.encodings.agl.names -- loaded runtime in context
local missing = { }
@@ -538,7 +255,12 @@ function mappings.addtounicode(data,filename)
for unic, glyph in next, descriptions do
local index = glyph.index
local name = glyph.name
- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+ local r = overloads[name]
+ if r then
+ -- get rid of weird ligatures
+ -- glyph.name = r.name
+ glyph.unicode = r.unicode
+ elseif unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
local unicode = lumunic and lumunic[name] or unicodevector[name]
if unicode then
glyph.unicode = unicode
@@ -641,6 +363,11 @@ function mappings.addtounicode(data,filename)
end
end
-- check using substitutes and alternates
+ local r = overloads[unicode]
+ if r then
+ unicode = r.unicode
+ glyph.unicode = unicode
+ end
--
if not unicode then
missing[name] = true
@@ -670,6 +397,10 @@ function mappings.addtounicode(data,filename)
end
-- add to the list
local g = guess[variant]
+ -- local r = overloads[unicode]
+ -- if r then
+ -- unicode = r.unicode
+ -- end
if g then
g[gname] = unicode
else
diff --git a/tex/context/base/font-mis.lua b/tex/context/base/font-mis.lua
index 96d240300..22f4ccc58 100644
--- a/tex/context/base/font-mis.lua
+++ b/tex/context/base/font-mis.lua
@@ -22,7 +22,7 @@ local handlers = fonts.handlers
handlers.otf = handlers.otf or { }
local otf = handlers.otf
-otf.version = otf.version or 2.801
+otf.version = otf.version or 2.802
otf.cache = otf.cache or containers.define("fonts", "otf", otf.version, true)
function otf.loadcached(filename,format,sub)
diff --git a/tex/context/base/font-otf.lua b/tex/context/base/font-otf.lua
index 58a72508a..18b975215 100644
--- a/tex/context/base/font-otf.lua
+++ b/tex/context/base/font-otf.lua
@@ -53,7 +53,7 @@ local otf = fonts.handlers.otf
otf.glists = { "gsub", "gpos" }
-otf.version = 2.801 -- beware: also sync font-mis.lua
+otf.version = 2.802 -- beware: also sync font-mis.lua
otf.cache = containers.define("fonts", "otf", otf.version, true)
local fontdata = fonts.hashes.identifiers
diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua
index f3fd28b1d..f310bc0fe 100644
--- a/tex/context/base/l-lpeg.lua
+++ b/tex/context/base/l-lpeg.lua
@@ -145,6 +145,9 @@ patterns.utfbom_8 = utfbom_8
patterns.utf_16_be_nl = P("\000\r\000\n") + P("\000\r") + P("\000\n") -- P("\000\r") * (P("\000\n") + P(true)) + P("\000\n")
patterns.utf_16_le_nl = P("\r\000\n\000") + P("\r\000") + P("\n\000") -- P("\r\000") * (P("\n\000") + P(true)) + P("\n\000")
+patterns.utf_32_be_nl = P("\000\000\000\r\000\000\000\n") + P("\000\000\000\r") + P("\000\000\000\n")
+patterns.utf_32_le_nl = P("\r\000\000\000\n\000\000\000") + P("\r\000\000\000") + P("\n\000\000\000")
+
patterns.utf8one = R("\000\127")
patterns.utf8two = R("\194\223") * utf8next
patterns.utf8three = R("\224\239") * utf8next * utf8next
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua
index 85956308a..b3a4c35e6 100644
--- a/tex/context/base/l-unicode.lua
+++ b/tex/context/base/l-unicode.lua
@@ -56,7 +56,6 @@ local p_utfbom = patterns.utfbom
local p_newline = patterns.newline
local p_whitespace = patterns.whitespace
-
if not unicode then
unicode = { utf = utf } -- for a while
@@ -526,7 +525,8 @@ end
-- end
function utf.remapper(mapping,option) -- static also returns a pattern
- if type(mapping) == "table" then
+ local variant = type(mapping)
+ if variant == "table" then
if option == "dynamic" then
local pattern = false
table.setmetatablenewindex(mapping,function(t,k,v) rawset(t,k,v) pattern = false end)
@@ -553,6 +553,19 @@ function utf.remapper(mapping,option) -- static also returns a pattern
end
end, pattern
end
+ elseif variant == "function" then
+ if option == "pattern" then
+ return Cs((p_utf8char/mapping + p_utf8char)^0)
+ else
+ local pattern = Cs((p_utf8char/mapping + p_utf8char)^0)
+ return function(str)
+ if not str or str == "" then
+ return ""
+ else
+ return lpegmatch(pattern,str)
+ end
+ end, pattern
+ end
else
-- is actually an error
return function(str)
@@ -669,297 +682,359 @@ end
local utf16_to_utf8_be, utf16_to_utf8_le
local utf32_to_utf8_be, utf32_to_utf8_le
-local utf_16_be_linesplitter = patterns.utfbom_16_be^-1 * lpeg.tsplitat(patterns.utf_16_be_nl)
-local utf_16_le_linesplitter = patterns.utfbom_16_le^-1 * lpeg.tsplitat(patterns.utf_16_le_nl)
+local utf_16_be_getbom = patterns.utfbom_16_be^-1
+local utf_16_le_getbom = patterns.utfbom_16_le^-1
+local utf_32_be_getbom = patterns.utfbom_32_be^-1
+local utf_32_le_getbom = patterns.utfbom_32_le^-1
+
+local utf_16_be_linesplitter = utf_16_be_getbom * lpeg.tsplitat(patterns.utf_16_be_nl)
+local utf_16_le_linesplitter = utf_16_le_getbom * lpeg.tsplitat(patterns.utf_16_le_nl)
+local utf_32_be_linesplitter = utf_32_be_getbom * lpeg.tsplitat(patterns.utf_32_be_nl)
+local utf_32_le_linesplitter = utf_32_le_getbom * lpeg.tsplitat(patterns.utf_32_le_nl)
+
+-- we have three possibilities: bytepairs (using tables), gmatch (using tables), gsub and
+-- lpeg. Bytepairs are the fastert but as soon as we need to remove bombs and so the gain
+-- is less due to more testing. Also, we seldom have to convert utf16 so we don't care to
+-- much about a few milliseconds more runtime. The lpeg variant is upto 20% slower but
+-- still pretty fast.
+--
+-- for historic resone we keep the bytepairs variants around .. beware they don't grab the
+-- bom like the lpegs do so they're not dropins in the functions that follow
+--
+-- utf16_to_utf8_be = function(s)
+-- if not s then
+-- return nil
+-- elseif s == "" then
+-- return ""
+-- end
+-- local result, r, more = { }, 0, 0
+-- for left, right in bytepairs(s) do
+-- if right then
+-- local now = 256*left + right
+-- if more > 0 then
+-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+-- more = 0
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- elseif now >= 0xD800 and now <= 0xDBFF then
+-- more = now
+-- else
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- end
+-- end
+-- end
+-- return concat(result)
+-- end
+--
+-- utf16_to_utf8_be_t = function(t)
+-- if not t then
+-- return nil
+-- elseif type(t) == "string" then
+-- t = lpegmatch(utf_16_be_linesplitter,t)
+-- end
+-- local result = { } -- we reuse result
+-- for i=1,#t do
+-- local s = t[i]
+-- if s ~= "" then
+-- local r, more = 0, 0
+-- for left, right in bytepairs(s) do
+-- if right then
+-- local now = 256*left + right
+-- if more > 0 then
+-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+-- more = 0
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- elseif now >= 0xD800 and now <= 0xDBFF then
+-- more = now
+-- else
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- end
+-- end
+-- end
+-- t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+-- end
+-- end
+-- return t
+-- end
+--
+-- utf16_to_utf8_le = function(s)
+-- if not s then
+-- return nil
+-- elseif s == "" then
+-- return ""
+-- end
+-- local result, r, more = { }, 0, 0
+-- for left, right in bytepairs(s) do
+-- if right then
+-- local now = 256*right + left
+-- if more > 0 then
+-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+-- more = 0
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- elseif now >= 0xD800 and now <= 0xDBFF then
+-- more = now
+-- else
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- end
+-- end
+-- end
+-- return concat(result)
+-- end
+--
+-- utf16_to_utf8_le_t = function(t)
+-- if not t then
+-- return nil
+-- elseif type(t) == "string" then
+-- t = lpegmatch(utf_16_le_linesplitter,t)
+-- end
+-- local result = { } -- we reuse result
+-- for i=1,#t do
+-- local s = t[i]
+-- if s ~= "" then
+-- local r, more = 0, 0
+-- for left, right in bytepairs(s) do
+-- if right then
+-- local now = 256*right + left
+-- if more > 0 then
+-- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+-- more = 0
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- elseif now >= 0xD800 and now <= 0xDBFF then
+-- more = now
+-- else
+-- r = r + 1
+-- result[r] = utfchar(now)
+-- end
+-- end
+-- end
+-- t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+-- end
+-- end
+-- return t
+-- end
+--
+-- utf32_to_utf8_be_t = function(t)
+-- if not t then
+-- return nil
+-- elseif type(t) == "string" then
+-- t = lpegmatch(utflinesplitter,t)
+-- end
+-- local result = { } -- we reuse result
+-- for i=1,#t do
+-- local r, more = 0, -1
+-- for a,b in bytepairs(t[i]) do
+-- if a and b then
+-- if more < 0 then
+-- more = 256*256*256*a + 256*256*b
+-- else
+-- r = r + 1
+-- result[t] = utfchar(more + 256*a + b)
+-- more = -1
+-- end
+-- else
+-- break
+-- end
+-- end
+-- t[i] = concat(result,"",1,r)
+-- end
+-- return t
+-- end
+--
+-- utf32_to_utf8_le_t = function(t)
+-- if not t then
+-- return nil
+-- elseif type(t) == "string" then
+-- t = lpegmatch(utflinesplitter,t)
+-- end
+-- local result = { } -- we reuse result
+-- for i=1,#t do
+-- local r, more = 0, -1
+-- for a,b in bytepairs(t[i]) do
+-- if a and b then
+-- if more < 0 then
+-- more = 256*b + a
+-- else
+-- r = r + 1
+-- result[t] = utfchar(more + 256*256*256*b + 256*256*a)
+-- more = -1
+-- end
+-- else
+-- break
+-- end
+-- end
+-- t[i] = concat(result,"",1,r)
+-- end
+-- return t
+-- end
--- we have three possibilities:
+local more = 0
+
+local p_utf16_to_utf8_be = C(1) * C(1) /function(left,right)
+ local now = 256*byte(left) + byte(right)
+ if more > 0 then
+ now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+ more = 0
+ return utfchar(now)
+ elseif now >= 0xD800 and now <= 0xDBFF then
+ more = now
+ -- return ""
+ else
+ return utfchar(now)
+ end
+end
+
+local p_utf16_to_utf8_le = C(1) * C(1) /function(right,left)
+ local now = 256*byte(left) + byte(right)
+ if more > 0 then
+ now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+ more = 0
+ return utfchar(now)
+ elseif now >= 0xD800 and now <= 0xDBFF then
+ more = now
+ -- return ""
+ else
+ return utfchar(now)
+ end
+end
+local p_utf32_to_utf8_be = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d)
+ return utfchar(256*256*256*byte(a) + 256*256*byte(b) + 256*byte(c) + byte(d))
+end
--- bytepairs: 0.048
--- gmatch : 0.069
--- lpeg : 0.089 (match time captures)
+local p_utf32_to_utf8_le = C(1) * C(1) * C(1) * C(1) /function(a,b,c,d)
+ return utfchar(256*256*256*byte(d) + 256*256*byte(c) + 256*byte(b) + byte(a))
+end
-if bytepairs then
+p_utf16_to_utf8_be = P(true) / function() more = 0 end * utf_16_be_getbom * Cs(p_utf16_to_utf8_be^0)
+p_utf16_to_utf8_le = P(true) / function() more = 0 end * utf_16_le_getbom * Cs(p_utf16_to_utf8_le^0)
+p_utf32_to_utf8_be = P(true) / function() more = 0 end * utf_32_be_getbom * Cs(p_utf32_to_utf8_be^0)
+p_utf32_to_utf8_le = P(true) / function() more = 0 end * utf_32_le_getbom * Cs(p_utf32_to_utf8_le^0)
- -- with a little bit more code we could include the linesplitter
+patterns.utf16_to_utf8_be = p_utf16_to_utf8_be
+patterns.utf16_to_utf8_le = p_utf16_to_utf8_le
+patterns.utf32_to_utf8_be = p_utf32_to_utf8_be
+patterns.utf32_to_utf8_le = p_utf32_to_utf8_le
- utf16_to_utf8_be = function(t)
- if not t then
- return nil
- elseif type(t) == "string" then
- t = lpegmatch(utf_16_be_linesplitter,t)
- end
- local result = { } -- we reuse result
- for i=1,#t do
- local r, more = 0, 0
- for left, right in bytepairs(t[i]) do
- if right then
- local now = 256*left + right
- if more > 0 then
- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
- more = 0
- r = r + 1
- result[r] = utfchar(now)
- elseif now >= 0xD800 and now <= 0xDBFF then
- more = now
- else
- r = r + 1
- result[r] = utfchar(now)
- end
- end
- end
- t[i] = concat(result,"",1,r) -- we reused tmp, hence t
- end
- return t
+utf16_to_utf8_be = function(s)
+ if s and s ~= "" then
+ return lpegmatch(p_utf16_to_utf8_be,s)
+ else
+ return s
end
+end
- utf16_to_utf8_le = function(t)
- if not t then
- return nil
- elseif type(t) == "string" then
- t = lpegmatch(utf_16_le_linesplitter,t)
- end
- local result = { } -- we reuse result
- for i=1,#t do
- local r, more = 0, 0
- for left, right in bytepairs(t[i]) do
- if right then
- local now = 256*right + left
- if more > 0 then
- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
- more = 0
- r = r + 1
- result[r] = utfchar(now)
- elseif now >= 0xD800 and now <= 0xDBFF then
- more = now
- else
- r = r + 1
- result[r] = utfchar(now)
- end
- end
- end
- t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+utf16_to_utf8_be_t = function(t)
+ if not t then
+ return nil
+ elseif type(t) == "string" then
+ t = lpegmatch(utf_16_be_linesplitter,t)
+ end
+ for i=1,#t do
+ local s = t[i]
+ if s ~= "" then
+ t[i] = lpegmatch(p_utf16_to_utf8_be,s)
end
- return t
end
+ return t
+end
- utf32_to_utf8_be = function(t)
- if not t then
- return nil
- elseif type(t) == "string" then
- t = lpegmatch(utflinesplitter,t)
- end
- local result = { } -- we reuse result
- for i=1,#t do
- local r, more = 0, -1
- for a,b in bytepairs(t[i]) do
- if a and b then
- if more < 0 then
- more = 256*256*256*a + 256*256*b
- else
- r = r + 1
- result[t] = utfchar(more + 256*a + b)
- more = -1
- end
- else
- break
- end
- end
- t[i] = concat(result,"",1,r)
- end
- return t
+utf16_to_utf8_le = function(s)
+ if s and s ~= "" then
+ return lpegmatch(p_utf16_to_utf8_le,s)
+ else
+ return s
end
+end
- utf32_to_utf8_le = function(t)
- if not t then
- return nil
- elseif type(t) == "string" then
- t = lpegmatch(utflinesplitter,t)
- end
- local result = { } -- we reuse result
- for i=1,#t do
- local r, more = 0, -1
- for a,b in bytepairs(t[i]) do
- if a and b then
- if more < 0 then
- more = 256*b + a
- else
- r = r + 1
- result[t] = utfchar(more + 256*256*256*b + 256*256*a)
- more = -1
- end
- else
- break
- end
- end
- t[i] = concat(result,"",1,r)
+utf16_to_utf8_le_t = function(t)
+ if not t then
+ return nil
+ elseif type(t) == "string" then
+ t = lpegmatch(utf_16_le_linesplitter,t)
+ end
+ for i=1,#t do
+ local s = t[i]
+ if s ~= "" then
+ t[i] = lpegmatch(p_utf16_to_utf8_le,s)
end
- return t
end
+ return t
+end
-else
-
- utf16_to_utf8_be = function(t)
- if not t then
- return nil
- elseif type(t) == "string" then
- t = lpegmatch(utf_16_be_linesplitter,t)
- end
- local result = { } -- we reuse result
- for i=1,#t do
- local r, more = 0, 0
- for left, right in gmatch(t[i],"(.)(.)") do
- if left == "\000" then -- experiment
- r = r + 1
- result[r] = utfchar(byte(right))
- elseif right then
- local now = 256*byte(left) + byte(right)
- if more > 0 then
- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
- more = 0
- r = r + 1
- result[r] = utfchar(now)
- elseif now >= 0xD800 and now <= 0xDBFF then
- more = now
- else
- r = r + 1
- result[r] = utfchar(now)
- end
- end
- end
- t[i] = concat(result,"",1,r) -- we reused tmp, hence t
- end
- return t
+utf32_to_utf8_be = function(s)
+ if s and s ~= "" then
+ return lpegmatch(p_utf32_to_utf8_be,s)
+ else
+ return s
end
+end
- utf16_to_utf8_le = function(t)
- if not t then
- return nil
- elseif type(t) == "string" then
- t = lpegmatch(utf_16_le_linesplitter,t)
- end
- local result = { } -- we reuse result
- for i=1,#t do
- local r, more = 0, 0
- for left, right in gmatch(t[i],"(.)(.)") do
- if right == "\000" then
- r = r + 1
- result[r] = utfchar(byte(left))
- elseif right then
- local now = 256*byte(right) + byte(left)
- if more > 0 then
- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
- more = 0
- r = r + 1
- result[r] = utfchar(now)
- elseif now >= 0xD800 and now <= 0xDBFF then
- more = now
- else
- r = r + 1
- result[r] = utfchar(now)
- end
- end
- end
- t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+utf32_to_utf8_be_t = function(t)
+ if not t then
+ return nil
+ elseif type(t) == "string" then
+ t = lpegmatch(utf_32_be_linesplitter,t)
+ end
+ for i=1,#t do
+ local s = t[i]
+ if s ~= "" then
+ t[i] = lpegmatch(p_utf32_to_utf8_be,s)
end
- return t
end
+ return t
+end
- utf32_to_utf8_le = function() return { } end -- never used anyway
- utf32_to_utf8_be = function() return { } end -- never used anyway
-
- -- the next one is slighty slower
-
- -- local result, lines, r, more = { }, { }, 0, 0
- --
- -- local simple = Cmt(
- -- C(1) * C(1), function(str,p,left,right)
- -- local now = 256*byte(left) + byte(right)
- -- if more > 0 then
- -- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
- -- more = 0
- -- r = r + 1
- -- result[r] = utfchar(now)
- -- elseif now >= 0xD800 and now <= 0xDBFF then
- -- more = now
- -- else
- -- r = r + 1
- -- result[r] = utfchar(now)
- -- end
- -- return p
- -- end
- -- )
- --
- -- local complex = Cmt(
- -- C(1) * C(1), function(str,p,left,right)
- -- local now = 256*byte(left) + byte(right)
- -- if more > 0 then
- -- now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
- -- more = 0
- -- r = r + 1
- -- result[r] = utfchar(now)
- -- elseif now >= 0xD800 and now <= 0xDBFF then
- -- more = now
- -- else
- -- r = r + 1
- -- result[r] = utfchar(now)
- -- end
- -- return p
- -- end
- -- )
- --
- -- local lineend = Cmt (
- -- patterns.utf_16_be_nl, function(str,p)
- -- lines[#lines+1] = concat(result,"",1,r)
- -- r, more = 0, 0
- -- return p
- -- end
- -- )
- --
- -- local be_1 = patterns.utfbom_16_be^-1 * (simple + complex)^0
- -- local be_2 = patterns.utfbom_16_be^-1 * (lineend + simple + complex)^0
- --
- -- utf16_to_utf8_be = function(t)
- -- if type(t) == "string" then
- -- local s = t
- -- lines, r, more = { }, 0, 0
- -- lpegmatch(be_2,s)
- -- if r > 0 then
- -- lines[#lines+1] = concat(result,"",1,r)
- -- end
- -- result = { }
- -- return lines
- -- else
- -- for i=1,#t do
- -- r, more = 0, 0
- -- lpegmatch(be_1,t[i])
- -- t[i] = concat(result,"",1,r)
- -- end
- -- result = { }
- -- return t
- -- end
- -- end
+utf32_to_utf8_le = function(s)
+ if s and s ~= "" then
+ return lpegmatch(p_utf32_to_utf8_le,s)
+ else
+ return s
+ end
+end
+utf32_to_utf8_le_t = function(t)
+ if not t then
+ return nil
+ elseif type(t) == "string" then
+ t = lpegmatch(utf_32_le_linesplitter,t)
+ end
+ for i=1,#t do
+ local s = t[i]
+ if s ~= "" then
+ t[i] = lpegmatch(p_utf32_to_utf8_le,s)
+ end
+ end
+ return t
end
-utf.utf16_to_utf8_le = utf16_to_utf8_le
-utf.utf16_to_utf8_be = utf16_to_utf8_be
-utf.utf32_to_utf8_le = utf32_to_utf8_le
-utf.utf32_to_utf8_be = utf32_to_utf8_be
+utf.utf16_to_utf8_le_t = utf16_to_utf8_le_t
+utf.utf16_to_utf8_be_t = utf16_to_utf8_be_t
+utf.utf32_to_utf8_le_t = utf32_to_utf8_le_t
+utf.utf32_to_utf8_be_t = utf32_to_utf8_be_t
-function utf.utf8_to_utf8(t)
+utf.utf16_to_utf8_le = utf16_to_utf8_le
+utf.utf16_to_utf8_be = utf16_to_utf8_be
+utf.utf32_to_utf8_le = utf32_to_utf8_le
+utf.utf32_to_utf8_be = utf32_to_utf8_be
+
+function utf.utf8_to_utf8_t(t)
return type(t) == "string" and lpegmatch(utflinesplitter,t) or t
end
-function utf.utf16_to_utf8(t,endian)
- return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t
+function utf.utf16_to_utf8_t(t,endian)
+ return endian and utf16_to_utf8_be_t(t) or utf16_to_utf8_le_t(t) or t
end
-function utf.utf32_to_utf8(t,endian)
- return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t
+function utf.utf32_to_utf8_t(t,endian)
+ return endian and utf32_to_utf8_be_t(t) or utf32_to_utf8_le_t(t) or t
end
-local function little(c)
- local b = byte(c)
+local function little(b)
if b < 0x10000 then
return char(b%256,b/256)
else
@@ -969,8 +1044,7 @@ local function little(c)
end
end
-local function big(c)
- local b = byte(c)
+local function big(b)
if b < 0x10000 then
return char(b/256,b%256)
else
@@ -980,18 +1054,10 @@ local function big(c)
end
end
--- function utf.utf8_to_utf16(str,littleendian)
--- if littleendian then
--- return char(255,254) .. utfgsub(str,".",little)
--- else
--- return char(254,255) .. utfgsub(str,".",big)
--- end
--- end
-
-local l_remap = utf.remapper(little,"pattern")
-local b_remap = utf.remapper(big,"pattern")
+local l_remap = Cs((p_utf8byte/little+P(1)/"")^0)
+local b_remap = Cs((p_utf8byte/big +P(1)/"")^0)
-function utf.utf8_to_utf16_be(str,nobom)
+local function utf8_to_utf16_be(str,nobom)
if nobom then
return lpegmatch(b_remap,str)
else
@@ -999,7 +1065,7 @@ function utf.utf8_to_utf16_be(str,nobom)
end
end
-function utf.utf8_to_utf16_le(str,nobom)
+local function utf8_to_utf16_le(str,nobom)
if nobom then
return lpegmatch(l_remap,str)
else
@@ -1007,11 +1073,14 @@ function utf.utf8_to_utf16_le(str,nobom)
end
end
+utf.utf8_to_utf16_be = utf8_to_utf16_be
+utf.utf8_to_utf16_le = utf8_to_utf16_le
+
function utf.utf8_to_utf16(str,littleendian,nobom)
if littleendian then
- return utf.utf8_to_utf16_le(str,nobom)
+ return utf8_to_utf16_le(str,nobom)
else
- return utf.utf8_to_utf16_be(str,nobom)
+ return utf8_to_utf16_be(str,nobom)
end
end
@@ -1042,16 +1111,16 @@ function utf.xstring(s)
end
function utf.toeight(str)
- if not str then
+ if not str or str == "" then
return nil
end
local utftype = lpegmatch(p_utfstricttype,str)
if utftype == "utf-8" then
- return sub(str,4)
- elseif utftype == "utf-16-le" then
- return utf16_to_utf8_le(str)
+ return sub(str,4) -- remove the bom
elseif utftype == "utf-16-be" then
- return utf16_to_utf8_ne(str)
+ return utf16_to_utf8_be(str) -- bom gets removed
+ elseif utftype == "utf-16-le" then
+ return utf16_to_utf8_le(str) -- bom gets removed
else
return str
end
diff --git a/tex/context/base/lang-url.mkiv b/tex/context/base/lang-url.mkiv
index 8990dccd8..fd3bd3b0d 100644
--- a/tex/context/base/lang-url.mkiv
+++ b/tex/context/base/lang-url.mkiv
@@ -138,3 +138,31 @@
% \dorecurse{100}{\test{a} \test{ab} \test{abc} \test{abcd} \test{abcde} \test{abcdef}}
\protect \endinput
+
+% \setuppapersize[A7]
+%
+% \unexpanded\def\WhateverA#1%
+% {\dontleavehmode
+% \begingroup
+% \prehyphenchar"B7\relax
+% \setbox\scratchbox\hbox{\tttf#1}%
+% \prehyphenchar`-\relax
+% \unhbox\scratchbox
+% \endgroup}
+%
+% \unexpanded\def\WhateverB#1%
+% {\dontleavehmode
+% \begingroup
+% \tttf
+% \prehyphenchar\minusone
+% % \localrightbox{\llap{_}}%
+% \localrightbox{\llap{\smash{\lower1.5ex\hbox{\char"2192}}}}%
+% \setbox\scratchbox\hbox{#1}%
+% \prehyphenchar`-\relax
+% \unhbox\scratchbox
+% \endgroup}
+%
+% \begingroup \hsize1cm
+% \WhateverA{thisisaboringandverylongcommand}\par
+% \WhateverB{thisisaboringandverylongcommand}\par
+% \endgroup
diff --git a/tex/context/base/lpdf-epa.lua b/tex/context/base/lpdf-epa.lua
index 5f6969f45..8ca568b76 100644
--- a/tex/context/base/lpdf-epa.lua
+++ b/tex/context/base/lpdf-epa.lua
@@ -253,6 +253,10 @@ end
-- new: for taco
+-- Beware, bookmarks can be in pdfdoc encoding or in unicode. However, in mkiv we
+-- write out the strings in unicode (hex). When we read them in, we check for a bom
+-- and convert to utf.
+
function codeinjections.getbookmarks(filename)
-- The first version built a nested tree and flattened that afterwards ... but I decided
@@ -325,7 +329,8 @@ function codeinjections.getbookmarks(filename)
local function traverse(current,depth)
while current do
- local title = current.Title
+ -- local title = current.Title
+ local title = current("Title") -- can be pdfdoc or unicode
if title then
local entry = {
level = depth,
diff --git a/tex/context/base/lpdf-epd.lua b/tex/context/base/lpdf-epd.lua
index 17007cdd1..14432d88b 100644
--- a/tex/context/base/lpdf-epd.lua
+++ b/tex/context/base/lpdf-epd.lua
@@ -27,30 +27,19 @@ if not modules then modules = { } end modules ['lpdf-epd'] = {
-- there was a long standing gc issue the on long runs with including many pages could
-- crash the analyzer.
--
--- - we cannot access all destinations in one run.
--- - v:getTypeName(), versus types[v:getType()], the last variant is about twice as fast
---
--- A potential speedup is to use local function instead of colon accessors. This will be done
--- in due time. Normally this code is not really speed sensitive but one never knows.
-
--- __newindex = function(t,k,v)
--- local tk = rawget(t,k)
--- if not tk then
--- local o = epdf.Object()
--- o:initString(v)
--- d:add(k,o)
--- end
--- rawset(t,k,v)
--- end,
+-- Normally a value is fetched by key, as in foo.Title but as it can be in pdfdoc encoding
+-- a safer bet is foo("Title") which will return a decoded string (or the original if it
+-- already was unicode).
local setmetatable, rawset, rawget, type = setmetatable, rawset, rawget, type
local tostring, tonumber = tostring, tonumber
-local lower, match, char, utfchar = string.lower, string.match, string.char, utf.char
+local lower, match, char, byte, find = string.lower, string.match, string.char, string.byte, string.find
+local abs = math.abs
local concat = table.concat
-local toutf = string.toutf
+local toutf, toeight, utfchar = string.toutf, utf.toeight, utf.char
local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
-local P, C, S, R, Ct, Cc, V = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V
+local P, C, S, R, Ct, Cc, V, Carg, Cs = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V, lpeg.Carg, lpeg.Cs
local epdf = epdf
lpdf = lpdf or { }
@@ -159,7 +148,20 @@ local checked_access
-- dictionaries (can be optimized: ... resolve and redefine when all locals set)
-local function prepare(document,d,t,n,k,mt)
+local frompdfdoc = lpdf.frompdfdoc
+
+local function get_flagged(t,f,k)
+ local fk = f[k]
+ if not fk then
+ return t[k]
+ elseif fk == "rawtext" then
+ return frompdfdoc(t[k])
+ else -- no other flags yet
+ return t[k]
+ end
+end
+
+local function prepare(document,d,t,n,k,mt,flags)
for i=1,n do
local v = dictGetVal(d,i)
if v then
@@ -174,17 +176,19 @@ local function prepare(document,d,t,n,k,mt)
local objnum = getRefNum(r)
local cached = document.__cache__[objnum]
if not cached then
- cached = checked_access[kind](v,document,objnum)
+ cached = checked_access[kind](v,document,objnum,mt)
if c then
document.__cache__[objnum] = cached
document.__xrefs__[cached] = objnum
end
end
t[key] = cached
- -- rawset(t,key,cached)
else
- t[key] = checked_access[kind](v,document)
- -- rawset(t,key,checked_access[kind](v,document))
+ local v, flag = checked_access[kind](v,document)
+ t[key] = v
+ if flag then
+ flags[key] = flag -- flags
+ end
end
else
report_epdf("warning: nil value for key %a in dictionary",key)
@@ -194,18 +198,26 @@ local function prepare(document,d,t,n,k,mt)
fatal_error("error: invalid value at index %a in dictionary of %a",i,document.filename)
end
end
- setmetatable(t,mt)
+ if mt then
+ setmetatable(t,mt)
+ else
+ getmetatable(t).__index = nil
+ end
return t[k]
end
-local function some_dictionary(d,document,r,mt)
+local function some_dictionary(d,document)
local n = d and dictGetLength(d) or 0
if n > 0 then
local t = { }
+ local f = { }
setmetatable(t, {
__index = function(t,k)
- return prepare(document,d,t,n,k,mt)
- end
+ return prepare(document,d,t,n,k,_,_,f)
+ end,
+ __call = function(t,k)
+ return get_flagged(t,f,k)
+ end,
} )
return t
end
@@ -216,9 +228,13 @@ local function get_dictionary(object,document,r,mt)
local n = d and dictGetLength(d) or 0
if n > 0 then
local t = { }
+ local f = { }
setmetatable(t, {
__index = function(t,k)
- return prepare(document,d,t,n,k,mt)
+ return prepare(document,d,t,n,k,mt,f)
+ end,
+ __call = function(t,k)
+ return get_flagged(t,f,k)
end,
} )
return t
@@ -259,7 +275,7 @@ local function prepare(document,a,t,n,k)
return t[k]
end
-local function some_array(a,document,r)
+local function some_array(a,document)
local n = a and arrayGetLength(a) or 0
if n > 0 then
local t = { n = n }
@@ -272,7 +288,7 @@ local function some_array(a,document,r)
end
end
-local function get_array(object,document,r)
+local function get_array(object,document)
local a = getArray(object)
local n = a and arrayGetLength(a) or 0
if n > 0 then
@@ -303,17 +319,45 @@ local function streamaccess(s,_,what)
end
end
-local function get_stream(d,document,r)
+local function get_stream(d,document)
if d then
streamReset(d)
- local s = some_dictionary(streamGetDict(d),document,r)
+ local s = some_dictionary(streamGetDict(d),document)
getmetatable(s).__call = function(...) return streamaccess(d,...) end
return s
end
end
+-- We need to convert the string from utf16 although there is no way to
+-- check if we have a regular string starting with a bom. So, we have
+-- na dilemma here: a pdf doc encoded string can be invalid utf.
+
+-- <hex encoded> : implicit 0 appended if odd
+-- (byte encoded) : \( \) \\ escaped
+--
+-- <FE><FF> : utf16be
+--
+-- \r \r \t \b \f \( \) \\ \NNN and \<newline> : append next line
+--
+-- the getString function gives back bytes so we don't need to worry about
+-- the hex aspect.
+
+local pattern = lpeg.patterns.utfbom_16_be * lpeg.patterns.utf16_to_utf8_be
+
local function get_string(v)
- return toutf(getString(v))
+ -- the toutf function only converts a utf16 string and leves the original
+ -- untouched otherwise; one might want to apply lpdf.frompdfdoc to a
+ -- non-unicode string
+ local s = getString(v)
+ if not s or s == "" then
+ return ""
+ end
+ local r = lpegmatch(pattern,s)
+ if r then
+ return r
+ else
+ return s, "rawtext"
+ end
end
local function get_null()
@@ -340,7 +384,7 @@ end)
checked_access[typenumbers.boolean] = getBool
checked_access[typenumbers.integer] = getNum
checked_access[typenumbers.real] = getReal
-checked_access[typenumbers.string] = get_string
+checked_access[typenumbers.string] = get_string -- getString
checked_access[typenumbers.name] = getName
checked_access[typenumbers.null] = get_null
checked_access[typenumbers.array] = get_array -- d,document,r
@@ -551,10 +595,10 @@ end
lpdf.epdf.expand = expand
lpdf.epdf.expanded = expanded
--- experiment .. will be finished when there is a real need
+-- we could resolve the text stream in one pass if we directly handle the
+-- font but why should we complicate things
local hexdigit = R("09","AF")
-local hexword = hexdigit*hexdigit*hexdigit*hexdigit / function(s) return tonumber(s,16) end
local numchar = ( P("\\") * ( (R("09")^3/tonumber) + C(1) ) ) + C(1)
local number = lpegpatterns.number / tonumber
local spaces = lpegpatterns.whitespace^1
@@ -563,10 +607,10 @@ local operator = C((R("AZ","az")+P("'")+P('"'))^1)
local grammar = P { "start",
start = (keyword + number + V("dictionary") + V("unicode") + V("string") + V("unicode")+ V("array") + spaces)^1,
- array = P("[") * Ct(V("start")^1) * P("]"),
- dictionary = P("<<") * Ct(V("start")^1) * P(">>"),
- unicode = P("<") * Ct(hexword^1) * P(">"),
- string = P("(") * Ct((V("string")+numchar)^1) * P(")"), -- untested
+ array = P("[") * Ct(V("start")^1) * P("]"),
+ dictionary = P("<<") * Ct(V("start")^1) * P(">>"),
+ unicode = P("<") * Ct(Cc("hex") * C((1-P(">"))^1)) * P(">"),
+ string = P("(") * Ct(Cc("dec") * C((V("string")+numchar)^1)) * P(")"), -- untested
}
local operation = Ct(grammar^1 * operator)
@@ -574,26 +618,37 @@ local parser = Ct((operation + P(1))^1)
-- beginbfrange : <start> <stop> <firstcode>
-- <start> <stop> [ <firstsequence> <firstsequence> <firstsequence> ]
--- beginbfchar : <code> <newcode>
+-- beginbfchar : <code> <newcodes>
+
+local fromsixteen = lpdf.fromsixteen -- maybe inline the lpeg ... but not worth it
+
+local function f_bfchar(t,a,b)
+ t[tonumber(a,16)] = fromsixteen(b)
+end
--- todo: utf16 -> 8
--- we could make range more efficient but it's seldom seen anyway
+local function f_bfrange_1(t,a,b,c)
+ print("todo 1",a,b,c)
+ -- c is string
+ -- todo t[tonumber(a,16)] = fromsixteen(b)
+end
+
+local function f_bfrange_2(t,a,b,c)
+ print("todo 2",a,b,c)
+ -- c is table
+ -- todo t[tonumber(a,16)] = fromsixteen(b)
+end
local optionals = spaces^0
-local whatever = optionals * P("<") * hexword * P(">")
-local hexstring = optionals * P("<") * C(hexdigit^1) * P(">")
-local bfchar = Cc(1) * whatever * whatever
-local bfrange = Cc(2) * whatever * whatever * whatever
- + Cc(3) * whatever * whatever * optionals * P("[") * hexstring^1 * optionals * P("]")
-local fromunicode = Ct ( (
- P("beginbfchar" ) * Ct(bfchar )^1 * optionals * P("endbfchar" ) +
- P("beginbfrange") * Ct(bfrange)^1 * optionals * P("endbfrange") +
+local hexstring = optionals * P("<") * C((1-P(">"))^1) * P(">")
+local bfchar = Carg(1) * hexstring * hexstring / f_bfchar
+local bfrange = Carg(1) * hexstring * hexstring * hexstring / f_bfrange_1
+ + Carg(1) * hexstring * hexstring * optionals * P("[") * Ct(hexstring^1) * optionals * P("]") / f_bfrange_2
+local fromunicode = (
+ P("beginbfchar" ) * bfchar ^1 * optionals * P("endbfchar" ) +
+ P("beginbfrange") * bfrange^1 * optionals * P("endbfrange") +
spaces +
P(1)
-)^1 )
-
-local utf16_to_utf8_be = utf.utf16_to_utf8_be
-local utfchar = utfchar
+)^1 * Carg(1)
local function analyzefonts(document,resources) -- unfinished
local fonts = document.__fonts__
@@ -606,37 +661,12 @@ local function analyzefonts(document,resources) -- unfinished
-- -application for it
local tounicode = data.ToUnicode()
if tounicode then
- tounicode = lpegmatch(fromunicode,tounicode)
- end
- if type(tounicode) == "table" then
- local t = { }
- for i=1,#tounicode do
- local u = tounicode[i]
- local w = u[1]
- if w == 1 then
- t[u[2]] = utfchar(u[3])
- elseif w == 2 then
- local m = u[4]
- for i=u[2],u[3] do
- t[i] = utfchar(m)
- m = m + 1
- end
- elseif w == 3 then
- local m = 4
- for i=u[2],u[3] do
- t[i] = utf16_to_utf8_be(u[m])
- m = m + 1
- end
- end
- end
- fonts[id] = {
- tounicode = t
- }
- else
- fonts[id] = {
- tounicode = { }
- }
+ tounicode = lpegmatch(fromunicode,tounicode,1,{})
end
+ fonts[id] = {
+ tounicode = type(tounicode) == "table" and tounicode or { }
+ }
+ table.setmetatableindex(fonts[id],"self")
end
end
end
@@ -644,6 +674,31 @@ local function analyzefonts(document,resources) -- unfinished
return fonts
end
+local more = 0
+local unic = nil -- cheaper than passing each time as Carg(1)
+
+local p_hex_to_utf = C(4) / function(s) -- needs checking !
+ local now = tonumber(s,16)
+ if more > 0 then
+ now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+ more = 0
+ return unic[now] or utfchar(now)
+ elseif now >= 0xD800 and now <= 0xDBFF then
+ more = now
+ -- return ""
+ else
+ return unic[now] or utfchar(now)
+ end
+end
+
+local p_dec_to_utf = C(1) / function(s) -- needs checking !
+ local now = byte(s)
+ return unic[now] or utfchar(now)
+end
+
+local p_hex_to_utf = P(true) / function() more = 0 end * Cs(p_hex_to_utf^1)
+local p_dec_to_utf = P(true) / function() more = 0 end * Cs(p_dec_to_utf^1)
+
function lpdf.epdf.getpagecontent(document,pagenumber)
local page = document.pages[pagenumber]
@@ -657,7 +712,7 @@ function lpdf.epdf.getpagecontent(document,pagenumber)
local content = page.Contents() or ""
local list = lpegmatch(parser,content)
local font = nil
- local unic = nil
+ -- local unic = nil
for i=1,#list do
local entry = list[i]
@@ -671,55 +726,85 @@ function lpdf.epdf.getpagecontent(document,pagenumber)
for i=1,#list do
local li = list[i]
if type(li) == "table" then
- for i=1,#li do
- local c = li[i]
- local u = unic[c]
- li[i] = u or utfchar(c)
+ if li[1] == "hex" then
+ list[i] = lpegmatch(p_hex_to_utf,li[2])
+ else
+ list[i] = lpegmatch(p_dec_to_utf,li[2])
end
- list[i] = concat(li)
+ else
+ -- kern
end
end
elseif operator == "Tj" or operator == "'" or operator == '"' then -- { string, Tj } { string, ' } { n, m, string, " }
- local li = entry[size-1]
- for i=1,#li do
- local c = li[i]
- local u = unic[c]
- li[i] = utfchar(u or c)
+ local list = entry[size-1]
+ if list[1] == "hex" then
+ list[2] = lpegmatch(p_hex_to_utf,li[2],1,unic)
+ else
+ list[2] = lpegmatch(p_dec_to_utf,li[2],1,unic)
end
- entry[1] = concat(li)
end
end
- -- for i=1,#list do
- -- local entry = list[i]
- -- local size = #entry
- -- local operator = entry[size]
- -- if operator == "TJ" then -- { array, TJ }
- -- local list = entry[1]
- -- for i=1,#list do
- -- local li = list[i]
- -- if type(li) == "string" then
- -- --
- -- elseif li < -50 then
- -- list[i] = " "
- -- else
- -- list[i] = ""
- -- end
- -- end
- -- entry[1] = concat(list)
- -- elseif operator == "Tf" then
- -- -- already concat
- -- elseif operator == "cm" then
- -- local e = entry[1]
- -- local sx, rx, ry, sy, tx, ty = e[1], e[2], e[3], e[4], e[5], e[6]
- -- -- if dy ... newline
- -- end
- -- end
+ unic = nil -- can be collected
return list
end
+-- This is also an experiment. When I really neet it I can improve it, fo rinstance
+-- with proper position calculating. It might be usefull for some search or so.
+
+local softhyphen = utfchar(0xAD) .. "$"
+local linefactor = 1.3
+
+function lpdf.epdf.contenttotext(document,list) -- maybe signal fonts
+ local last_y = 0
+ local last_f = 0
+ local text = { }
+ local last = 0
+
+ for i=1,#list do
+ local entry = list[i]
+ local size = #entry
+ local operator = entry[size]
+ if operator == "Tf" then
+ last_f = entry[2]
+ elseif operator == "TJ" then
+ local list = entry[1]
+ for i=1,#list do
+ local li = list[i]
+ if type(li) == "string" then
+ last = last + 1
+ text[last] = li
+ elseif li < -50 then
+ last = last + 1
+ text[last] = " "
+ end
+ end
+ line = concat(list)
+ elseif operator == "Tj" then
+ last = last + 1
+ text[last] = entry[size-1]
+ elseif operator == "cm" or operator == "Tm" then
+ local ty = entry[6]
+ local dy = abs(last_y - ty)
+ if dy > linefactor*last_f then
+ if last > 0 then
+ if find(text[last],softhyphen) then
+ -- ignore
+ else
+ last = last + 1
+ text[last] = "\n"
+ end
+ end
+ end
+ last_y = ty
+ end
+ end
+
+ return concat(text)
+end
+
-- document.Catalog.StructTreeRoot.ParentTree.Nums[2][1].A.P[1])
-- helpers
diff --git a/tex/context/base/lpdf-fld.lua b/tex/context/base/lpdf-fld.lua
index 414562ad5..4f15b3c7b 100644
--- a/tex/context/base/lpdf-fld.lua
+++ b/tex/context/base/lpdf-fld.lua
@@ -280,10 +280,8 @@ end
local pdfdocencodingvector, pdfdocencodingcapsule
--- The pdf doc encoding vector is needed in order to
--- trigger propper unicode. Interesting is that when
--- a glyph is not in the vector, it is still visible
--- as it is taken from some other font. Messy.
+-- The pdf doc encoding vector is needed in order to trigger propper unicode. Interesting is that when
+-- a glyph is not in the vector, it is still visible as it is taken from some other font. Messy.
-- To be checked: only when text/line fields.
diff --git a/tex/context/base/lpdf-ini.lua b/tex/context/base/lpdf-ini.lua
index a4725c30e..76fa5cbb2 100644
--- a/tex/context/base/lpdf-ini.lua
+++ b/tex/context/base/lpdf-ini.lua
@@ -6,6 +6,8 @@ if not modules then modules = { } end modules ['lpdf-ini'] = {
license = "see context related readme files"
}
+-- beware of "too many locals" here
+
local setmetatable, getmetatable, type, next, tostring, tonumber, rawset = setmetatable, getmetatable, type, next, tostring, tonumber, rawset
local char, byte, format, gsub, concat, match, sub, gmatch = string.char, string.byte, string.format, string.gsub, table.concat, string.match, string.sub, string.gmatch
local utfchar, utfbyte, utfvalues = utf.char, utf.byte, utf.values
@@ -18,6 +20,10 @@ local report_objects = logs.reporter("backend","objects")
local report_finalizing = logs.reporter("backend","finalizing")
local report_blocked = logs.reporter("backend","blocked")
+-- In ConTeXt MkIV we use utf8 exclusively so all strings get mapped onto a hex
+-- encoded utf16 string type between <>. We could probably save some bytes by using
+-- strings between () but then we end up with escaped ()\ too.
+
-- gethpos : used
-- getpos : used
-- getvpos : used
@@ -227,55 +233,78 @@ local cache = table.setmetatableindex(function(t,k) -- can be made weak
return v
end)
-local p = Cs(Cc("<feff") * (lpeg.patterns.utf8character/cache)^1 * Cc(">"))
+local escaped = Cs(Cc("(") * (S("\\()")/"\\%0" + P(1))^0 * Cc(")"))
+local unified = Cs(Cc("<feff") * (lpeg.patterns.utf8character/cache)^1 * Cc(">"))
local function tosixteen(str) -- an lpeg might be faster (no table)
if not str or str == "" then
return "<feff>" -- not () as we want an indication that it's unicode
else
- return lpegmatch(p,str)
+ return lpegmatch(unified,str)
end
end
-lpdf.tosixteen = tosixteen
+local more = 0
+
+local pattern = C(4) / function(s) -- needs checking !
+ local now = tonumber(s,16)
+ if more > 0 then
+ now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+ more = 0
+ return utfchar(now)
+ elseif now >= 0xD800 and now <= 0xDBFF then
+ more = now
+ -- return ""
+ else
+ return utfchar(now)
+ end
+end
--- lpeg is some 5 times faster than gsub (in test) on escaping
+local pattern = P(true) / function() more = 0 end * Cs(pattern^0)
--- local escapes = {
--- ["\\"] = "\\\\",
--- ["/"] = "\\/", ["#"] = "\\#",
--- ["<"] = "\\<", [">"] = "\\>",
--- ["["] = "\\[", ["]"] = "\\]",
--- ["("] = "\\(", [")"] = "\\)",
--- }
---
--- local escaped = Cs(Cc("(") * (S("\\/#<>[]()")/escapes + P(1))^0 * Cc(")"))
---
--- local function toeight(str)
--- if not str or str == "" then
--- return "()"
--- else
--- return lpegmatch(escaped,str)
--- end
--- end
---
--- -- no need for escaping .. just use unicode instead
+local function fromsixteen(str)
+ if not str or str == "" then
+ return ""
+ else
+ return lpegmatch(pattern,str)
+ end
+end
--- \0 \t \n \r \f <space> ( ) [ ] { } / %
+local toregime = regimes.toregime
+local fromregime = regimes.fromregime
-local function toeight(str)
- return "(" .. str .. ")"
+local function topdfdoc(str,default)
+ if not str or str == "" then
+ return ""
+ else
+ return lpegmatch(escaped,toregime("pdfdoc",str,default)) -- could be combined if needed
+ end
end
-lpdf.toeight = toeight
+local function frompdfdoc(str)
+ if not str or str == "" then
+ return ""
+ else
+ return fromregime("pdfdoc",str)
+ end
+end
--- local escaped = lpeg.Cs((lpeg.S("\0\t\n\r\f ()[]{}/%")/function(s) return format("#%02X",byte(s)) end + lpeg.P(1))^0)
---
--- local function cleaned(str)
--- return (str and str ~= "" and lpegmatch(escaped,str)) or ""
--- end
---
--- lpdf.cleaned = cleaned -- not public yet
+if not toregime then topdfdoc = function(s) return s end end
+if not fromregime then frompdfdoc = function(s) return s end end
+
+local function toeight(str)
+ if not str or str == "" then
+ return "()"
+ else
+ return lpegmatch(escaped,str)
+ end
+end
+
+lpdf.tosixteen = tosixteen
+lpdf.toeight = toeight
+lpdf.topdfdoc = topdfdoc
+lpdf.fromsixteen = fromsixteen
+lpdf.frompdfdoc = frompdfdoc
local function merge_t(a,b)
local t = { }
@@ -310,8 +339,8 @@ tostring_d = function(t,contentonly,key)
r[rn] = f_key_value(k,toeight(v))
elseif tv == "number" then
r[rn] = f_key_number(k,v)
- elseif tv == "unicode" then
- r[rn] = f_key_value(k,tosixteen(v))
+ -- elseif tv == "unicode" then -- can't happen
+ -- r[rn] = f_key_value(k,tosixteen(v))
elseif tv == "table" then
local mv = getmetatable(v)
if mv and mv.__lpdftype then
@@ -350,8 +379,8 @@ tostring_a = function(t,contentonly,key)
r[k] = toeight(v)
elseif tv == "number" then
r[k] = f_tonumber(v)
- elseif tv == "unicode" then
- r[k] = tosixteen(v)
+ -- elseif tv == "unicode" then
+ -- r[k] = tosixteen(v)
elseif tv == "table" then
local mv = getmetatable(v)
local mt = mv and mv.__lpdftype
@@ -380,15 +409,16 @@ tostring_a = function(t,contentonly,key)
end
end
-local tostring_x = function(t) return concat(t," ") end
-local tostring_s = function(t) return toeight(t[1]) end
-local tostring_u = function(t) return tosixteen(t[1]) end
-local tostring_n = function(t) return tostring(t[1]) end -- tostring not needed
-local tostring_n = function(t) return f_tonumber(t[1]) end -- tostring not needed
-local tostring_c = function(t) return t[1] end -- already prefixed (hashed)
-local tostring_z = function() return "null" end
-local tostring_t = function() return "true" end
-local tostring_f = function() return "false" end
+local tostring_x = function(t) return concat(t," ") end
+local tostring_s = function(t) return toeight(t[1]) end
+local tostring_p = function(t) return topdfdoc(t[1],t[2]) end
+local tostring_u = function(t) return tosixteen(t[1]) end
+local tostring_n = function(t) return tostring(t[1]) end -- tostring not needed
+local tostring_n = function(t) return f_tonumber(t[1]) end -- tostring not needed
+local tostring_c = function(t) return t[1] end -- already prefixed (hashed)
+local tostring_z = function() return "null" end
+local tostring_t = function() return "true" end
+local tostring_f = function() return "false" end
local tostring_r = function(t) local n = t[1] return n and n > 0 and (n .. " 0 R") or "NULL" end
local tostring_v = function(t)
@@ -400,18 +430,19 @@ local tostring_v = function(t)
end
end
-local function value_x(t) return t end -- the call is experimental
-local function value_s(t,key) return t[1] end -- the call is experimental
-local function value_u(t,key) return t[1] end -- the call is experimental
-local function value_n(t,key) return t[1] end -- the call is experimental
-local function value_c(t) return sub(t[1],2) end -- the call is experimental
-local function value_d(t) return tostring_d(t,true) end -- the call is experimental
-local function value_a(t) return tostring_a(t,true) end -- the call is experimental
-local function value_z() return nil end -- the call is experimental
-local function value_t(t) return t.value or true end -- the call is experimental
-local function value_f(t) return t.value or false end -- the call is experimental
-local function value_r() return t[1] or 0 end -- the call is experimental -- NULL
-local function value_v() return t[1] end -- the call is experimental
+local function value_x(t) return t end
+local function value_s(t) return t[1] end
+local function value_p(t) return t[1] end
+local function value_u(t) return t[1] end
+local function value_n(t) return t[1] end
+local function value_c(t) return sub(t[1],2) end
+local function value_d(t) return tostring_d(t,true) end
+local function value_a(t) return tostring_a(t,true) end
+local function value_z() return nil end
+local function value_t(t) return t.value or true end
+local function value_f(t) return t.value or false end
+local function value_r() return t[1] or 0 end -- NULL
+local function value_v() return t[1] end
local function add_x(t,k,v) rawset(t,k,tostring(v)) end
@@ -420,6 +451,7 @@ local mt_d = { __lpdftype = "dictionary", __tostring = tostring_d, __call = valu
local mt_a = { __lpdftype = "array", __tostring = tostring_a, __call = value_a }
local mt_u = { __lpdftype = "unicode", __tostring = tostring_u, __call = value_u }
local mt_s = { __lpdftype = "string", __tostring = tostring_s, __call = value_s }
+local mt_p = { __lpdftype = "docstring", __tostring = tostring_p, __call = value_p }
local mt_n = { __lpdftype = "number", __tostring = tostring_n, __call = value_n }
local mt_c = { __lpdftype = "constant", __tostring = tostring_c, __call = value_c }
local mt_z = { __lpdftype = "null", __tostring = tostring_z, __call = value_z }
@@ -453,6 +485,10 @@ local function pdfstring(str,default)
return setmetatable({ str or default or "" },mt_s)
end
+local function pdfdocstring(str,default,defaultchar)
+ return setmetatable({ str or default or "", defaultchar or " " },mt_p)
+end
+
local function pdfunicode(str,default)
return setmetatable({ str or default or "" },mt_u) -- could be a string
end
@@ -538,6 +574,7 @@ end
lpdf.stream = pdfstream -- THIS WILL PROBABLY CHANGE
lpdf.dictionary = pdfdictionary
lpdf.array = pdfarray
+lpdf.docstring = pdfdocstring
lpdf.string = pdfstring
lpdf.unicode = pdfunicode
lpdf.number = pdfnumber
@@ -800,145 +837,147 @@ end
callbacks.register("finish_pdffile", lpdf.finalizedocument)
--- some minimal tracing, handy for checking the order
-local function trace_set(what,key)
- if trace_resources then
- report_finalizing("setting key %a in %a",key,what)
+do
+
+ -- some minimal tracing, handy for checking the order
+
+ local function trace_set(what,key)
+ if trace_resources then
+ report_finalizing("setting key %a in %a",key,what)
+ end
end
-end
-local function trace_flush(what)
- if trace_resources then
- report_finalizing("flushing %a",what)
+
+ local function trace_flush(what)
+ if trace_resources then
+ report_finalizing("flushing %a",what)
+ end
end
-end
-lpdf.protectresources = true
+ lpdf.protectresources = true
-local catalog = pdfdictionary { Type = pdfconstant("Catalog") } -- nicer, but when we assign we nil the Type
-local info = pdfdictionary { Type = pdfconstant("Info") } -- nicer, but when we assign we nil the Type
------ names = pdfdictionary { Type = pdfconstant("Names") } -- nicer, but when we assign we nil the Type
+ local catalog = pdfdictionary { Type = pdfconstant("Catalog") } -- nicer, but when we assign we nil the Type
+ local info = pdfdictionary { Type = pdfconstant("Info") } -- nicer, but when we assign we nil the Type
+ ----- names = pdfdictionary { Type = pdfconstant("Names") } -- nicer, but when we assign we nil the Type
-local function flushcatalog()
- if not environment.initex then
- trace_flush("catalog")
- catalog.Type = nil
- pdfsetcatalog(catalog())
+ local function flushcatalog()
+ if not environment.initex then
+ trace_flush("catalog")
+ catalog.Type = nil
+ pdfsetcatalog(catalog())
+ end
end
-end
-local function flushinfo()
- if not environment.initex then
- trace_flush("info")
- info.Type = nil
- pdfsetinfo(info())
+ local function flushinfo()
+ if not environment.initex then
+ trace_flush("info")
+ info.Type = nil
+ pdfsetinfo(info())
+ end
end
-end
-
--- local function flushnames()
--- if not environment.initex then
--- trace_flush("names")
--- names.Type = nil
--- pdfsetnames(names())
--- end
--- end
-function lpdf.addtocatalog(k,v)
- if not (lpdf.protectresources and catalog[k]) then
- trace_set("catalog",k)
- catalog[k] = v
+ -- local function flushnames()
+ -- if not environment.initex then
+ -- trace_flush("names")
+ -- names.Type = nil
+ -- pdfsetnames(names())
+ -- end
+ -- end
+
+ function lpdf.addtocatalog(k,v)
+ if not (lpdf.protectresources and catalog[k]) then
+ trace_set("catalog",k)
+ catalog[k] = v
+ end
end
-end
-function lpdf.addtoinfo(k,v)
- if not (lpdf.protectresources and info[k]) then
- trace_set("info",k)
- info[k] = v
+ function lpdf.addtoinfo(k,v)
+ if not (lpdf.protectresources and info[k]) then
+ trace_set("info",k)
+ info[k] = v
+ end
end
-end
--- local function lpdf.addtonames(k,v)
--- if not (lpdf.protectresources and names[k]) then
--- trace_set("names",k)
--- names[k] = v
--- end
--- end
+ -- local function lpdf.addtonames(k,v)
+ -- if not (lpdf.protectresources and names[k]) then
+ -- trace_set("names",k)
+ -- names[k] = v
+ -- end
+ -- end
-local names = pdfdictionary {
- -- Type = pdfconstant("Names")
-}
+ local names = pdfdictionary {
+ -- Type = pdfconstant("Names")
+ }
-local function flushnames()
- if next(names) and not environment.initex then
- names.Type = pdfconstant("Names")
- trace_flush("names")
- lpdf.addtocatalog("Names",pdfreference(pdfimmediateobject(tostring(names))))
+ local function flushnames()
+ if next(names) and not environment.initex then
+ names.Type = pdfconstant("Names")
+ trace_flush("names")
+ lpdf.addtocatalog("Names",pdfreference(pdfimmediateobject(tostring(names))))
+ end
end
-end
-function lpdf.addtonames(k,v)
- if not (lpdf.protectresources and names [k]) then
- trace_set("names", k)
- names [k] = v
+ function lpdf.addtonames(k,v)
+ if not (lpdf.protectresources and names[k]) then
+ trace_set("names", k)
+ names [k] = v
+ end
end
-end
-local dummy = pdfreserveobject() -- else bug in hvmd due so some internal luatex conflict
-
--- Some day I will implement a proper minimalized resource management.
-
-local r_extgstates, d_extgstates = pdfreserveobject(), pdfdictionary() local p_extgstates = pdfreference(r_extgstates)
-local r_colorspaces, d_colorspaces = pdfreserveobject(), pdfdictionary() local p_colorspaces = pdfreference(r_colorspaces)
-local r_patterns, d_patterns = pdfreserveobject(), pdfdictionary() local p_patterns = pdfreference(r_patterns)
-local r_shades, d_shades = pdfreserveobject(), pdfdictionary() local p_shades = pdfreference(r_shades)
-
-local function checkextgstates () if next(d_extgstates ) then addtopageresources("ExtGState", p_extgstates ) end end
-local function checkcolorspaces() if next(d_colorspaces) then addtopageresources("ColorSpace",p_colorspaces) end end
-local function checkpatterns () if next(d_patterns ) then addtopageresources("Pattern", p_patterns ) end end
-local function checkshades () if next(d_shades ) then addtopageresources("Shading", p_shades ) end end
-
-local function flushextgstates () if next(d_extgstates ) then trace_flush("extgstates") pdfimmediateobject(r_extgstates, tostring(d_extgstates )) end end
-local function flushcolorspaces() if next(d_colorspaces) then trace_flush("colorspaces") pdfimmediateobject(r_colorspaces,tostring(d_colorspaces)) end end
-local function flushpatterns () if next(d_patterns ) then trace_flush("patterns") pdfimmediateobject(r_patterns, tostring(d_patterns )) end end
-local function flushshades () if next(d_shades ) then trace_flush("shades") pdfimmediateobject(r_shades, tostring(d_shades )) end end
-
-function lpdf.collectedresources()
- local ExtGState = next(d_extgstates ) and p_extgstates
- local ColorSpace = next(d_colorspaces) and p_colorspaces
- local Pattern = next(d_patterns ) and p_patterns
- local Shading = next(d_shades ) and p_shades
- if ExtGState or ColorSpace or Pattern or Shading then
- local collected = pdfdictionary {
- ExtGState = ExtGState,
- ColorSpace = ColorSpace,
- Pattern = Pattern,
- Shading = Shading,
- -- ProcSet = pdfarray { pdfconstant("PDF") },
- }
- return collected()
- else
- return ""
+ local r_extgstates, d_extgstates = pdfreserveobject(), pdfdictionary() local p_extgstates = pdfreference(r_extgstates)
+ local r_colorspaces, d_colorspaces = pdfreserveobject(), pdfdictionary() local p_colorspaces = pdfreference(r_colorspaces)
+ local r_patterns, d_patterns = pdfreserveobject(), pdfdictionary() local p_patterns = pdfreference(r_patterns)
+ local r_shades, d_shades = pdfreserveobject(), pdfdictionary() local p_shades = pdfreference(r_shades)
+
+ local function checkextgstates () if next(d_extgstates ) then addtopageresources("ExtGState", p_extgstates ) end end
+ local function checkcolorspaces() if next(d_colorspaces) then addtopageresources("ColorSpace",p_colorspaces) end end
+ local function checkpatterns () if next(d_patterns ) then addtopageresources("Pattern", p_patterns ) end end
+ local function checkshades () if next(d_shades ) then addtopageresources("Shading", p_shades ) end end
+
+ local function flushextgstates () if next(d_extgstates ) then trace_flush("extgstates") pdfimmediateobject(r_extgstates, tostring(d_extgstates )) end end
+ local function flushcolorspaces() if next(d_colorspaces) then trace_flush("colorspaces") pdfimmediateobject(r_colorspaces,tostring(d_colorspaces)) end end
+ local function flushpatterns () if next(d_patterns ) then trace_flush("patterns") pdfimmediateobject(r_patterns, tostring(d_patterns )) end end
+ local function flushshades () if next(d_shades ) then trace_flush("shades") pdfimmediateobject(r_shades, tostring(d_shades )) end end
+
+ function lpdf.collectedresources()
+ local ExtGState = next(d_extgstates ) and p_extgstates
+ local ColorSpace = next(d_colorspaces) and p_colorspaces
+ local Pattern = next(d_patterns ) and p_patterns
+ local Shading = next(d_shades ) and p_shades
+ if ExtGState or ColorSpace or Pattern or Shading then
+ local collected = pdfdictionary {
+ ExtGState = ExtGState,
+ ColorSpace = ColorSpace,
+ Pattern = Pattern,
+ Shading = Shading,
+ -- ProcSet = pdfarray { pdfconstant("PDF") },
+ }
+ return collected()
+ else
+ return ""
+ end
end
-end
-function lpdf.adddocumentextgstate (k,v) d_extgstates [k] = v end
-function lpdf.adddocumentcolorspace(k,v) d_colorspaces[k] = v end
-function lpdf.adddocumentpattern (k,v) d_patterns [k] = v end
-function lpdf.adddocumentshade (k,v) d_shades [k] = v end
+ function lpdf.adddocumentextgstate (k,v) d_extgstates [k] = v end
+ function lpdf.adddocumentcolorspace(k,v) d_colorspaces[k] = v end
+ function lpdf.adddocumentpattern (k,v) d_patterns [k] = v end
+ function lpdf.adddocumentshade (k,v) d_shades [k] = v end
+
+ registerdocumentfinalizer(flushextgstates,3,"extended graphic states")
+ registerdocumentfinalizer(flushcolorspaces,3,"color spaces")
+ registerdocumentfinalizer(flushpatterns,3,"patterns")
+ registerdocumentfinalizer(flushshades,3,"shades")
-registerdocumentfinalizer(flushextgstates,3,"extended graphic states")
-registerdocumentfinalizer(flushcolorspaces,3,"color spaces")
-registerdocumentfinalizer(flushpatterns,3,"patterns")
-registerdocumentfinalizer(flushshades,3,"shades")
+ registerdocumentfinalizer(flushnames,3,"names") -- before catalog
+ registerdocumentfinalizer(flushcatalog,3,"catalog")
+ registerdocumentfinalizer(flushinfo,3,"info")
-registerdocumentfinalizer(flushnames,3,"names") -- before catalog
-registerdocumentfinalizer(flushcatalog,3,"catalog")
-registerdocumentfinalizer(flushinfo,3,"info")
+ registerpagefinalizer(checkextgstates,3,"extended graphic states")
+ registerpagefinalizer(checkcolorspaces,3,"color spaces")
+ registerpagefinalizer(checkpatterns,3,"patterns")
+ registerpagefinalizer(checkshades,3,"shades")
-registerpagefinalizer(checkextgstates,3,"extended graphic states")
-registerpagefinalizer(checkcolorspaces,3,"color spaces")
-registerpagefinalizer(checkpatterns,3,"patterns")
-registerpagefinalizer(checkshades,3,"shades")
+end
-- in strc-bkm: lpdf.registerdocumentfinalizer(function() structures.bookmarks.place() end,1)
@@ -949,19 +988,23 @@ end
-- ! -> universaltime
-local timestamp = os.date("%Y-%m-%dT%X") .. os.timezone(true)
+do
-function lpdf.timestamp()
- return timestamp
-end
+ local timestamp = os.date("%Y-%m-%dT%X") .. os.timezone(true)
-function lpdf.pdftimestamp(str)
- local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$")
- return Y and format("D:%s%s%s%s%s%s%s%s'%s'",Y,M,D,h,m,s,Zs,Zh,Zm)
-end
+ function lpdf.timestamp()
+ return timestamp
+ end
+
+ function lpdf.pdftimestamp(str)
+ local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$")
+ return Y and format("D:%s%s%s%s%s%s%s%s'%s'",Y,M,D,h,m,s,Zs,Zh,Zm)
+ end
+
+ function lpdf.id()
+ return format("%s.%s",tex.jobname,timestamp)
+ end
-function lpdf.id()
- return format("%s.%s",tex.jobname,timestamp)
end
-- return nil is nicer in test prints
@@ -1104,25 +1147,29 @@ end
-- return formatters["BT /Span << /ActualText (CONTEXT) >> BDC [<feff>] TJ % t EMC ET"](code)
-local f_actual_text_one = formatters["BT /Span << /ActualText <feff%04x> >> BDC [<feff>] TJ %s EMC ET"]
-local f_actual_text_two = formatters["BT /Span << /ActualText <feff%04x%04x> >> BDC [<feff>] TJ %s EMC ET"]
-local f_actual_text = formatters["/Span <</ActualText %s >> BDC"]
+do
-local context = context
-local pdfdirect = nodes.pool.pdfdirect
+ local f_actual_text_one = formatters["BT /Span << /ActualText <feff%04x> >> BDC [<feff>] TJ %s EMC ET"]
+ local f_actual_text_two = formatters["BT /Span << /ActualText <feff%04x%04x> >> BDC [<feff>] TJ %s EMC ET"]
+ local f_actual_text = formatters["/Span <</ActualText %s >> BDC"]
-function codeinjections.unicodetoactualtext(unicode,pdfcode)
- if unicode < 0x10000 then
- return f_actual_text_one(unicode,pdfcode)
- else
- return f_actual_text_two(unicode/1024+0xD800,unicode%1024+0xDC00,pdfcode)
+ local context = context
+ local pdfdirect = nodes.pool.pdfdirect
+
+ function codeinjections.unicodetoactualtext(unicode,pdfcode)
+ if unicode < 0x10000 then
+ return f_actual_text_one(unicode,pdfcode)
+ else
+ return f_actual_text_two(unicode/1024+0xD800,unicode%1024+0xDC00,pdfcode)
+ end
end
-end
-function commands.startactualtext(str)
- context(pdfdirect(f_actual_text(tosixteen(str))))
-end
+ function commands.startactualtext(str)
+ context(pdfdirect(f_actual_text(tosixteen(str))))
+ end
+
+ function commands.stopactualtext()
+ context(pdfdirect("EMC"))
+ end
-function commands.stopactualtext()
- context(pdfdirect("EMC"))
end
diff --git a/tex/context/base/regi-ini.lua b/tex/context/base/regi-ini.lua
index 9484db7c7..c0a23cf42 100644
--- a/tex/context/base/regi-ini.lua
+++ b/tex/context/base/regi-ini.lua
@@ -15,7 +15,7 @@ runtime.</p>
local commands, context = commands, context
local utfchar = utf.char
-local P, Cs, lpegmatch = lpeg.P, lpeg.Cs, lpeg.match
+local P, Cs, Cc, lpegmatch = lpeg.P, lpeg.Cs, lpeg.Cc, lpeg.match
local char, gsub, format, gmatch, byte, match = string.char, string.gsub, string.format, string.gmatch, string.byte, string.match
local next = next
local insert, remove, fastcopy = table.insert, table.remove, table.fastcopy
@@ -99,6 +99,8 @@ local synonyms = { -- backward compatibility list
["windows"] = "cp1252",
+ ["pdf"] = "pdfdoc",
+
}
local currentregime = "utf"
@@ -132,7 +134,7 @@ end
setmetatableindex(mapping, loadregime)
setmetatableindex(backmapping,loadreverse)
-local function translate(line,regime)
+local function fromregime(regime,line)
if line and #line > 0 then
local map = mapping[regime and synonyms[regime] or regime or currentregime]
if map then
@@ -178,12 +180,15 @@ local function toregime(vector,str,default) -- toregime('8859-1',"abcde Ä","?")
local r = c[d]
if not r then
local t = fastcopy(backmapping[vector])
- setmetatableindex(t, function(t,k)
- local v = d
- t[k] = v
- return v
- end)
- r = utf.remapper(t)
+ -- r = utf.remapper(t) -- not good for defaults here
+ local pattern = Cs((lpeg.utfchartabletopattern(t)/t + lpeg.patterns.utf8character/d + P(1)/d)^0)
+ r = function(str)
+ if not str or str == "" then
+ return ""
+ else
+ return lpegmatch(pattern,str)
+ end
+ end
c[d] = r
end
return r(str)
@@ -204,10 +209,11 @@ local function enable(regime)
end
end
-regimes.toregime = toregime
-regimes.translate = translate
-regimes.enable = enable
-regimes.disable = disable
+regimes.toregime = toregime
+regimes.fromregime = fromregime
+regimes.translate = function(str,regime) return fromregime(regime,str) end
+regimes.enable = enable
+regimes.disable = disable
-- The following function can be used when we want to make sure that
-- utf gets passed unharmed. This is needed for modules.
@@ -216,7 +222,7 @@ local level = 0
function regimes.process(str,filename,currentline,noflines,coding)
if level == 0 and coding ~= "utf-8" then
- str = translate(str,currentregime)
+ str = fromregime(currentregime,str)
if trace_translating then
report_translating("utf: %s",str)
end
@@ -403,5 +409,5 @@ end
-- local new = regimes.cleanup("cp1252",old)
-- report_translating("%s -> %s",old,new)
-- local old = "Pozn" .. char(0xE1) .. "mky"
--- local new = translate(old,"cp1250")
+-- local new = fromregime("cp1250",old)
-- report_translating("%s -> %s",old,new)
diff --git a/tex/context/base/regi-pdfdoc.lua b/tex/context/base/regi-pdfdoc.lua
new file mode 100644
index 000000000..363d3ae0d
--- /dev/null
+++ b/tex/context/base/regi-pdfdoc.lua
@@ -0,0 +1,26 @@
+if not modules then modules = { } end modules ['regi-pdfdoc'] = {
+ version = 1.001,
+ comment = "companion to regi-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+return { [0] =
+ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, 0x0010,
+ 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC, 0x001F,
+ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+ 0x2022, 0x2020, 0x2021, 0x2026, 0x2014, 0x2013, 0x0192, 0x2044, 0x2039, 0x203A, 0x2212, 0x2030, 0x201E, 0x201C, 0x201D, 0x2018,
+ 0x2019, 0x201A, 0x2122, 0xFB01, 0xFB02, 0x0141, 0x0152, 0x0160, 0x0178, 0x017D, 0x0131, 0x0142, 0x0153, 0x0161, 0x017E, 0x009F,
+ 0x20AC, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0xFFFD, 0x00AE, 0x00AF,
+ 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+ 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+ 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
+ 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+ 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF,
+}
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 51d01e48d..7fb1ecac9 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
Binary files differ
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index f4d5e92d4..a9ca8b459 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
Binary files differ
diff --git a/tex/context/base/strc-bkm.mkiv b/tex/context/base/strc-bkm.mkiv
index 9d2ebd796..5f1acb686 100644
--- a/tex/context/base/strc-bkm.mkiv
+++ b/tex/context/base/strc-bkm.mkiv
@@ -127,6 +127,38 @@
}}%
\to \everysetupbookmark
+%D There is a plugin mechanism but this is for experts only. The intermediate
+%D data structures are stable.
+%D
+%D \starttyping
+%D \startluacode
+%D structures.bookmarks.installhandler("check before","before",function(levels)
+%D logs.report("extra bookmarks","before (normal bookmarks)")
+%D inspect(levels)
+%D logs.report("extra bookmarks","before (extra bookmarks)")
+%D inspect(structures.bookmarks.extras.get())
+%D return levels
+%D end)
+%D structures.bookmarks.installhandler("check after", "after", function(levels)
+%D logs.report("extra bookmarks","after (merged bookmarks)")
+%D inspect(levels)
+%D return levels
+%D end)
+%D \stopluacode
+%D \starttyping
+%D
+%D This mechanism was added when bookmark inclusion became (optional) part of graphic
+%D inclusion (which is needed by Taco).
+%D
+%D \starttyping
+%D \getfiguredimensions[somefile.pdf]
+%D \dorecurse {\noffigurepages} {
+%D \startTEXpage
+%D \externalfigure[somefile.pdf][interaction=bookmark,page=\recurselevel]
+%D \stopTEXpage
+%D }
+%D \starttyping
+
\protect \endinput
% \starttext
diff --git a/tex/context/base/supp-box.lua b/tex/context/base/supp-box.lua
index 3c5a3383d..c69486306 100644
--- a/tex/context/base/supp-box.lua
+++ b/tex/context/base/supp-box.lua
@@ -42,9 +42,11 @@ local setfield = nuts.setfield
local setbox = nuts.setbox
local free_node = nuts.free
-local copy_list = nuts.copy_list
+local flush_list = nuts.flush_list
local copy_node = nuts.copy
+local copy_list = nuts.copy_list
local find_tail = nuts.tail
+local traverse_id = nuts.traverse_id
local listtoutf = nodes.listtoutf
@@ -84,6 +86,19 @@ end
commands.hyphenatedlist = hyphenatedlist
+-- local function hyphenatedhack(head,pre)
+-- pre = tonut(pre)
+-- for n in traverse_id(disc_code,tonut(head)) do
+-- local hyphen = getfield(n,"pre")
+-- if hyphen then
+-- flush_list(hyphen)
+-- end
+-- setfield(n,"pre",copy_list(pre))
+-- end
+-- end
+--
+-- commands.hyphenatedhack = hyphenatedhack
+
function commands.showhyphenatedinlist(list)
report_hyphenation("show: %s",listtoutf(tonut(list),false,true))
end
diff --git a/tex/context/base/supp-box.mkiv b/tex/context/base/supp-box.mkiv
index 66f373b72..bc1e30749 100644
--- a/tex/context/base/supp-box.mkiv
+++ b/tex/context/base/supp-box.mkiv
@@ -1063,7 +1063,7 @@
%D \showhyphens{dohyphenatedword}
%D \stoptyping
-\def\doshowhyphenatednextbox
+\unexpanded\def\doshowhyphenatednextbox
{\ctxcommand{showhyphenatedinlist(tex.box[\number\nextbox].list)}}
\unexpanded\def\showhyphens{\dowithnextboxcs\doshowhyphenatednextbox\hbox}
@@ -1076,7 +1076,7 @@
%D \hyphenatedfile{tufte}
%D \stoptyping
-\def\dohyphenatednextbox
+\unexpanded\def\dohyphenatednextbox
{\ctxcommand{hyphenatedlist(tex.box[\number\nextbox].list)}%
\unhbox\nextbox}
@@ -1084,6 +1084,20 @@
\unexpanded\def\hyphenatedpar {\dowithnextboxcs\dohyphenatednextbox\hbox}
\unexpanded\def\hyphenatedfile#1{\dowithnextboxcs\dohyphenatednextbox\hbox{\readfile{#1}\donothing\donothing}}
+% D \starttyping
+% D \hyphenatedhack{\kern-.25em_}{alongword}
+% D \stoptyping
+%
+% \unexpanded\def\dohyphenatedhackbox
+% {\ctxcommand{hyphenatedhack(tex.box[\number\nextbox].list,tex.box[\number\scratchbox].list)}%
+% \unhbox\nextbox
+% \endgroup}
+%
+% \unexpanded\def\hyphenatedhack#1% the result of a test, not that useful
+% {\begingroup
+% \setbox\scratchbox\hbox{#1}% only chars and kerns !
+% \dowithnextboxcs\dohyphenatedhackbox\hbox}
+
%D \macros
%D {processtokens}
%D