diff options
Diffstat (limited to 'tex/context/base/font-map.lua')
-rw-r--r-- | tex/context/base/font-map.lua | 658 |
1 files changed, 329 insertions, 329 deletions
diff --git a/tex/context/base/font-map.lua b/tex/context/base/font-map.lua index 6988b9b9e..864b43c24 100644 --- a/tex/context/base/font-map.lua +++ b/tex/context/base/font-map.lua @@ -1,329 +1,329 @@ -if not modules then modules = { } end modules ['font-map'] = { - version = 1.001, - comment = "companion to font-ini.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - -local tonumber = tonumber - -local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower -local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match -local utfbyte = utf.byte -local floor = math.floor - -local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) -local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) - -local report_fonts = logs.reporter("fonts","loading") -- not otf only - -local fonts = fonts or { } -local mappings = fonts.mappings or { } -fonts.mappings = mappings - ---[[ldx-- -<p>Eventually this code will disappear because map files are kind -of obsolete. Some code may move to runtime or auxiliary modules.</p> -<p>The name to unciode related code will stay of course.</p> ---ldx]]-- - -local function loadlumtable(filename) -- will move to font goodies - local lumname = file.replacesuffix(file.basename(filename),"lum") - local lumfile = resolvers.findfile(lumname,"map") or "" - if lumfile ~= "" and lfs.isfile(lumfile) then - if trace_loading or trace_mapping then - report_fonts("loading map table %a",lumfile) - end - lumunic = dofile(lumfile) - return lumunic, lumfile - end -end - -local hex = R("AF","09") -local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end -local hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end -local dec = (R("09")^1) / tonumber -local period = P(".") -local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) -local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) -local index = P("index") * dec * Cc(false) - -local parser = unicode + ucode + index - -local parsers = { } - -local function makenameparser(str) - if not str or str == "" then - return parser - else - local p = parsers[str] - if not p then - p = P(str) * period * dec * Cc(false) - parsers[str] = p - end - return p - end -end - --- local parser = makenameparser("Japan1") --- local parser = makenameparser() --- local function test(str) --- local b, a = lpegmatch(parser,str) --- print((a and table.serialize(b)) or b) --- end --- test("a.sc") --- test("a") --- test("uni1234") --- test("uni1234.xx") --- test("uni12349876") --- test("u123400987600") --- test("index1234") --- test("Japan1.123") - -local function tounicode16(unicode,name) - if unicode < 0x10000 then - return format("%04X",unicode) - elseif unicode < 0x1FFFFFFFFF then - return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00) - else - report_fonts("can't convert %a in %a into tounicode",unicode,name) - end -end - -local function tounicode16sequence(unicodes,name) - local t = { } - for l=1,#unicodes do - local unicode = unicodes[l] - if unicode < 0x10000 then - t[l] = format("%04X",unicode) - elseif unicode < 0x1FFFFFFFFF then - t[l] = format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00) - else - report_fonts ("can't convert %a in %a into tounicode",unicode,name) - end - end - return concat(t) -end - -local function fromunicode16(str) - if #str == 4 then - return tonumber(str,16) - else - local l, r = match(str,"(....)(....)") - return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 - end -end - --- Slightly slower: --- --- local p = C(4) * (C(4)^-1) / function(l,r) --- if r then --- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 --- else --- return tonumber(l,16) --- end --- end --- --- local function fromunicode16(str) --- return lpegmatch(p,str) --- end - --- This is quite a bit faster but at the cost of some memory but if we --- do this we will also use it elsewhere so let's not follow this route --- now. I might use this method in the plain variant (no caching there) --- but then I need a flag that distinguishes between code branches. --- --- local cache = { } --- --- function mappings.tounicode16(unicode) --- local s = cache[unicode] --- if not s then --- if unicode < 0x10000 then --- s = format("%04X",unicode) --- else --- s = format("%04X%04X",unicode/0x400+0xD800,unicode%0x400+0xDC00) --- end --- cache[unicode] = s --- end --- return s --- end - -mappings.loadlumtable = loadlumtable -mappings.makenameparser = makenameparser -mappings.tounicode16 = tounicode16 -mappings.tounicode16sequence = tounicode16sequence -mappings.fromunicode16 = fromunicode16 - -local separator = S("_.") -local other = C((1 - separator)^1) -local ligsplitter = Ct(other * (separator * other)^0) - ---~ print(table.serialize(lpegmatch(ligsplitter,"this"))) ---~ print(table.serialize(lpegmatch(ligsplitter,"this.that"))) ---~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123"))) ---~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more"))) ---~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that"))) - -function mappings.addtounicode(data,filename) - local resources = data.resources - local properties = data.properties - local descriptions = data.descriptions - local unicodes = resources.unicodes - if not unicodes then - return - end - -- we need to move this code - unicodes['space'] = unicodes['space'] or 32 - unicodes['hyphen'] = unicodes['hyphen'] or 45 - unicodes['zwj'] = unicodes['zwj'] or 0x200D - unicodes['zwnj'] = unicodes['zwnj'] or 0x200C - -- the tounicode mapping is sparse and only needed for alternatives - local private = fonts.constructors.privateoffset - local unknown = format("%04X",utfbyte("?")) - local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context - local tounicode = { } - local originals = { } - resources.tounicode = tounicode - resources.originals = originals - local lumunic, uparser, oparser - local cidinfo, cidnames, cidcodes, usedmap - if false then -- will become an option - lumunic = loadlumtable(filename) - lumunic = lumunic and lumunic.tounicode - end - -- - cidinfo = properties.cidinfo - usedmap = cidinfo and fonts.cid.getmap(cidinfo) - -- - if usedmap then - oparser = usedmap and makenameparser(cidinfo.ordering) - cidnames = usedmap.names - cidcodes = usedmap.unicodes - end - uparser = makenameparser() - local ns, nl = 0, 0 - for unic, glyph in next, descriptions do - local index = glyph.index - local name = glyph.name - if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then - local unicode = lumunic and lumunic[name] or unicodevector[name] - if unicode then - originals[index] = unicode - tounicode[index] = tounicode16(unicode,name) - ns = ns + 1 - end - -- cidmap heuristics, beware, there is no guarantee for a match unless - -- the chain resolves - if (not unicode) and usedmap then - local foundindex = lpegmatch(oparser,name) - if foundindex then - unicode = cidcodes[foundindex] -- name to number - if unicode then - originals[index] = unicode - tounicode[index] = tounicode16(unicode,name) - ns = ns + 1 - else - local reference = cidnames[foundindex] -- number to name - if reference then - local foundindex = lpegmatch(oparser,reference) - if foundindex then - unicode = cidcodes[foundindex] - if unicode then - originals[index] = unicode - tounicode[index] = tounicode16(unicode,name) - ns = ns + 1 - end - end - if not unicode or unicode == "" then - local foundcodes, multiple = lpegmatch(uparser,reference) - if foundcodes then - originals[index] = foundcodes - if multiple then - tounicode[index] = tounicode16sequence(foundcodes) - nl = nl + 1 - unicode = true - else - tounicode[index] = tounicode16(foundcodes,name) - ns = ns + 1 - unicode = foundcodes - end - end - end - end - end - end - end - -- a.whatever or a_b_c.whatever or a_b_c (no numbers) - if not unicode or unicode == "" then - local split = lpegmatch(ligsplitter,name) - local nplit = split and #split or 0 - if nplit >= 2 then - local t, n = { }, 0 - for l=1,nplit do - local base = split[l] - local u = unicodes[base] or unicodevector[base] - if not u then - break - elseif type(u) == "table" then - n = n + 1 - t[n] = u[1] - else - n = n + 1 - t[n] = u - end - end - if n == 0 then -- done then - -- nothing - elseif n == 1 then - originals[index] = t[1] - tounicode[index] = tounicode16(t[1],name) - else - originals[index] = t - tounicode[index] = tounicode16sequence(t) - end - nl = nl + 1 - unicode = true - else - -- skip: already checked and we don't want privates here - end - end - -- last resort (we might need to catch private here as well) - if not unicode or unicode == "" then - local foundcodes, multiple = lpegmatch(uparser,name) - if foundcodes then - if multiple then - originals[index] = foundcodes - tounicode[index] = tounicode16sequence(foundcodes,name) - nl = nl + 1 - unicode = true - else - originals[index] = foundcodes - tounicode[index] = tounicode16(foundcodes,name) - ns = ns + 1 - unicode = foundcodes - end - end - end - -- if not unicode then - -- originals[index] = 0xFFFD - -- tounicode[index] = "FFFD" - -- end - end - end - if trace_mapping then - for unic, glyph in table.sortedhash(descriptions) do - local name = glyph.name - local index = glyph.index - local toun = tounicode[index] - if toun then - report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun) - else - report_fonts("internal slot %U, name %a, unicode %U",index,name,unic) - end - end - end - if trace_loading and (ns > 0 or nl > 0) then - report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns) - end -end +if not modules then modules = { } end modules ['font-map'] = {
+ version = 1.001,
+ comment = "companion to font-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local tonumber = tonumber
+
+local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
+local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match
+local utfbyte = utf.byte
+local floor = math.floor
+
+local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end)
+local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end)
+
+local report_fonts = logs.reporter("fonts","loading") -- not otf only
+
+local fonts = fonts or { }
+local mappings = fonts.mappings or { }
+fonts.mappings = mappings
+
+--[[ldx--
+<p>Eventually this code will disappear because map files are kind
+of obsolete. Some code may move to runtime or auxiliary modules.</p>
+<p>The name to unciode related code will stay of course.</p>
+--ldx]]--
+
+local function loadlumtable(filename) -- will move to font goodies
+ local lumname = file.replacesuffix(file.basename(filename),"lum")
+ local lumfile = resolvers.findfile(lumname,"map") or ""
+ if lumfile ~= "" and lfs.isfile(lumfile) then
+ if trace_loading or trace_mapping then
+ report_fonts("loading map table %a",lumfile)
+ end
+ lumunic = dofile(lumfile)
+ return lumunic, lumfile
+ end
+end
+
+local hex = R("AF","09")
+local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end
+local hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end
+local dec = (R("09")^1) / tonumber
+local period = P(".")
+local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true))
+local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true))
+local index = P("index") * dec * Cc(false)
+
+local parser = unicode + ucode + index
+
+local parsers = { }
+
+local function makenameparser(str)
+ if not str or str == "" then
+ return parser
+ else
+ local p = parsers[str]
+ if not p then
+ p = P(str) * period * dec * Cc(false)
+ parsers[str] = p
+ end
+ return p
+ end
+end
+
+-- local parser = makenameparser("Japan1")
+-- local parser = makenameparser()
+-- local function test(str)
+-- local b, a = lpegmatch(parser,str)
+-- print((a and table.serialize(b)) or b)
+-- end
+-- test("a.sc")
+-- test("a")
+-- test("uni1234")
+-- test("uni1234.xx")
+-- test("uni12349876")
+-- test("u123400987600")
+-- test("index1234")
+-- test("Japan1.123")
+
+local function tounicode16(unicode,name)
+ if unicode < 0x10000 then
+ return format("%04X",unicode)
+ elseif unicode < 0x1FFFFFFFFF then
+ return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ else
+ report_fonts("can't convert %a in %a into tounicode",unicode,name)
+ end
+end
+
+local function tounicode16sequence(unicodes,name)
+ local t = { }
+ for l=1,#unicodes do
+ local unicode = unicodes[l]
+ if unicode < 0x10000 then
+ t[l] = format("%04X",unicode)
+ elseif unicode < 0x1FFFFFFFFF then
+ t[l] = format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ else
+ report_fonts ("can't convert %a in %a into tounicode",unicode,name)
+ end
+ end
+ return concat(t)
+end
+
+local function fromunicode16(str)
+ if #str == 4 then
+ return tonumber(str,16)
+ else
+ local l, r = match(str,"(....)(....)")
+ return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00
+ end
+end
+
+-- Slightly slower:
+--
+-- local p = C(4) * (C(4)^-1) / function(l,r)
+-- if r then
+-- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00
+-- else
+-- return tonumber(l,16)
+-- end
+-- end
+--
+-- local function fromunicode16(str)
+-- return lpegmatch(p,str)
+-- end
+
+-- This is quite a bit faster but at the cost of some memory but if we
+-- do this we will also use it elsewhere so let's not follow this route
+-- now. I might use this method in the plain variant (no caching there)
+-- but then I need a flag that distinguishes between code branches.
+--
+-- local cache = { }
+--
+-- function mappings.tounicode16(unicode)
+-- local s = cache[unicode]
+-- if not s then
+-- if unicode < 0x10000 then
+-- s = format("%04X",unicode)
+-- else
+-- s = format("%04X%04X",unicode/0x400+0xD800,unicode%0x400+0xDC00)
+-- end
+-- cache[unicode] = s
+-- end
+-- return s
+-- end
+
+mappings.loadlumtable = loadlumtable
+mappings.makenameparser = makenameparser
+mappings.tounicode16 = tounicode16
+mappings.tounicode16sequence = tounicode16sequence
+mappings.fromunicode16 = fromunicode16
+
+local separator = S("_.")
+local other = C((1 - separator)^1)
+local ligsplitter = Ct(other * (separator * other)^0)
+
+--~ print(table.serialize(lpegmatch(ligsplitter,"this")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"this.that")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that")))
+
+function mappings.addtounicode(data,filename)
+ local resources = data.resources
+ local properties = data.properties
+ local descriptions = data.descriptions
+ local unicodes = resources.unicodes
+ if not unicodes then
+ return
+ end
+ -- we need to move this code
+ unicodes['space'] = unicodes['space'] or 32
+ unicodes['hyphen'] = unicodes['hyphen'] or 45
+ unicodes['zwj'] = unicodes['zwj'] or 0x200D
+ unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
+ -- the tounicode mapping is sparse and only needed for alternatives
+ local private = fonts.constructors.privateoffset
+ local unknown = format("%04X",utfbyte("?"))
+ local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
+ local tounicode = { }
+ local originals = { }
+ resources.tounicode = tounicode
+ resources.originals = originals
+ local lumunic, uparser, oparser
+ local cidinfo, cidnames, cidcodes, usedmap
+ if false then -- will become an option
+ lumunic = loadlumtable(filename)
+ lumunic = lumunic and lumunic.tounicode
+ end
+ --
+ cidinfo = properties.cidinfo
+ usedmap = cidinfo and fonts.cid.getmap(cidinfo)
+ --
+ if usedmap then
+ oparser = usedmap and makenameparser(cidinfo.ordering)
+ cidnames = usedmap.names
+ cidcodes = usedmap.unicodes
+ end
+ uparser = makenameparser()
+ local ns, nl = 0, 0
+ for unic, glyph in next, descriptions do
+ local index = glyph.index
+ local name = glyph.name
+ if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+ local unicode = lumunic and lumunic[name] or unicodevector[name]
+ if unicode then
+ originals[index] = unicode
+ tounicode[index] = tounicode16(unicode,name)
+ ns = ns + 1
+ end
+ -- cidmap heuristics, beware, there is no guarantee for a match unless
+ -- the chain resolves
+ if (not unicode) and usedmap then
+ local foundindex = lpegmatch(oparser,name)
+ if foundindex then
+ unicode = cidcodes[foundindex] -- name to number
+ if unicode then
+ originals[index] = unicode
+ tounicode[index] = tounicode16(unicode,name)
+ ns = ns + 1
+ else
+ local reference = cidnames[foundindex] -- number to name
+ if reference then
+ local foundindex = lpegmatch(oparser,reference)
+ if foundindex then
+ unicode = cidcodes[foundindex]
+ if unicode then
+ originals[index] = unicode
+ tounicode[index] = tounicode16(unicode,name)
+ ns = ns + 1
+ end
+ end
+ if not unicode or unicode == "" then
+ local foundcodes, multiple = lpegmatch(uparser,reference)
+ if foundcodes then
+ originals[index] = foundcodes
+ if multiple then
+ tounicode[index] = tounicode16sequence(foundcodes)
+ nl = nl + 1
+ unicode = true
+ else
+ tounicode[index] = tounicode16(foundcodes,name)
+ ns = ns + 1
+ unicode = foundcodes
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ -- a.whatever or a_b_c.whatever or a_b_c (no numbers)
+ if not unicode or unicode == "" then
+ local split = lpegmatch(ligsplitter,name)
+ local nplit = split and #split or 0
+ if nplit >= 2 then
+ local t, n = { }, 0
+ for l=1,nplit do
+ local base = split[l]
+ local u = unicodes[base] or unicodevector[base]
+ if not u then
+ break
+ elseif type(u) == "table" then
+ n = n + 1
+ t[n] = u[1]
+ else
+ n = n + 1
+ t[n] = u
+ end
+ end
+ if n == 0 then -- done then
+ -- nothing
+ elseif n == 1 then
+ originals[index] = t[1]
+ tounicode[index] = tounicode16(t[1],name)
+ else
+ originals[index] = t
+ tounicode[index] = tounicode16sequence(t)
+ end
+ nl = nl + 1
+ unicode = true
+ else
+ -- skip: already checked and we don't want privates here
+ end
+ end
+ -- last resort (we might need to catch private here as well)
+ if not unicode or unicode == "" then
+ local foundcodes, multiple = lpegmatch(uparser,name)
+ if foundcodes then
+ if multiple then
+ originals[index] = foundcodes
+ tounicode[index] = tounicode16sequence(foundcodes,name)
+ nl = nl + 1
+ unicode = true
+ else
+ originals[index] = foundcodes
+ tounicode[index] = tounicode16(foundcodes,name)
+ ns = ns + 1
+ unicode = foundcodes
+ end
+ end
+ end
+ -- if not unicode then
+ -- originals[index] = 0xFFFD
+ -- tounicode[index] = "FFFD"
+ -- end
+ end
+ end
+ if trace_mapping then
+ for unic, glyph in table.sortedhash(descriptions) do
+ local name = glyph.name
+ local index = glyph.index
+ local toun = tounicode[index]
+ if toun then
+ report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun)
+ else
+ report_fonts("internal slot %U, name %a, unicode %U",index,name,unic)
+ end
+ end
+ end
+ if trace_loading and (ns > 0 or nl > 0) then
+ report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns)
+ end
+end
|