diff options
Diffstat (limited to 'tex/context/base/font-map.lua')
-rw-r--r-- | tex/context/base/font-map.lua | 658 |
1 files changed, 329 insertions, 329 deletions
diff --git a/tex/context/base/font-map.lua b/tex/context/base/font-map.lua index 864b43c24..6988b9b9e 100644 --- a/tex/context/base/font-map.lua +++ b/tex/context/base/font-map.lua @@ -1,329 +1,329 @@ -if not modules then modules = { } end modules ['font-map'] = {
- version = 1.001,
- comment = "companion to font-ini.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-local tonumber = tonumber
-
-local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
-local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match
-local utfbyte = utf.byte
-local floor = math.floor
-
-local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end)
-local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end)
-
-local report_fonts = logs.reporter("fonts","loading") -- not otf only
-
-local fonts = fonts or { }
-local mappings = fonts.mappings or { }
-fonts.mappings = mappings
-
---[[ldx--
-<p>Eventually this code will disappear because map files are kind
-of obsolete. Some code may move to runtime or auxiliary modules.</p>
-<p>The name to unciode related code will stay of course.</p>
---ldx]]--
-
-local function loadlumtable(filename) -- will move to font goodies
- local lumname = file.replacesuffix(file.basename(filename),"lum")
- local lumfile = resolvers.findfile(lumname,"map") or ""
- if lumfile ~= "" and lfs.isfile(lumfile) then
- if trace_loading or trace_mapping then
- report_fonts("loading map table %a",lumfile)
- end
- lumunic = dofile(lumfile)
- return lumunic, lumfile
- end
-end
-
-local hex = R("AF","09")
-local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end
-local hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end
-local dec = (R("09")^1) / tonumber
-local period = P(".")
-local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true))
-local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true))
-local index = P("index") * dec * Cc(false)
-
-local parser = unicode + ucode + index
-
-local parsers = { }
-
-local function makenameparser(str)
- if not str or str == "" then
- return parser
- else
- local p = parsers[str]
- if not p then
- p = P(str) * period * dec * Cc(false)
- parsers[str] = p
- end
- return p
- end
-end
-
--- local parser = makenameparser("Japan1")
--- local parser = makenameparser()
--- local function test(str)
--- local b, a = lpegmatch(parser,str)
--- print((a and table.serialize(b)) or b)
--- end
--- test("a.sc")
--- test("a")
--- test("uni1234")
--- test("uni1234.xx")
--- test("uni12349876")
--- test("u123400987600")
--- test("index1234")
--- test("Japan1.123")
-
-local function tounicode16(unicode,name)
- if unicode < 0x10000 then
- return format("%04X",unicode)
- elseif unicode < 0x1FFFFFFFFF then
- return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
- else
- report_fonts("can't convert %a in %a into tounicode",unicode,name)
- end
-end
-
-local function tounicode16sequence(unicodes,name)
- local t = { }
- for l=1,#unicodes do
- local unicode = unicodes[l]
- if unicode < 0x10000 then
- t[l] = format("%04X",unicode)
- elseif unicode < 0x1FFFFFFFFF then
- t[l] = format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
- else
- report_fonts ("can't convert %a in %a into tounicode",unicode,name)
- end
- end
- return concat(t)
-end
-
-local function fromunicode16(str)
- if #str == 4 then
- return tonumber(str,16)
- else
- local l, r = match(str,"(....)(....)")
- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00
- end
-end
-
--- Slightly slower:
---
--- local p = C(4) * (C(4)^-1) / function(l,r)
--- if r then
--- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00
--- else
--- return tonumber(l,16)
--- end
--- end
---
--- local function fromunicode16(str)
--- return lpegmatch(p,str)
--- end
-
--- This is quite a bit faster but at the cost of some memory but if we
--- do this we will also use it elsewhere so let's not follow this route
--- now. I might use this method in the plain variant (no caching there)
--- but then I need a flag that distinguishes between code branches.
---
--- local cache = { }
---
--- function mappings.tounicode16(unicode)
--- local s = cache[unicode]
--- if not s then
--- if unicode < 0x10000 then
--- s = format("%04X",unicode)
--- else
--- s = format("%04X%04X",unicode/0x400+0xD800,unicode%0x400+0xDC00)
--- end
--- cache[unicode] = s
--- end
--- return s
--- end
-
-mappings.loadlumtable = loadlumtable
-mappings.makenameparser = makenameparser
-mappings.tounicode16 = tounicode16
-mappings.tounicode16sequence = tounicode16sequence
-mappings.fromunicode16 = fromunicode16
-
-local separator = S("_.")
-local other = C((1 - separator)^1)
-local ligsplitter = Ct(other * (separator * other)^0)
-
---~ print(table.serialize(lpegmatch(ligsplitter,"this")))
---~ print(table.serialize(lpegmatch(ligsplitter,"this.that")))
---~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123")))
---~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more")))
---~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that")))
-
-function mappings.addtounicode(data,filename)
- local resources = data.resources
- local properties = data.properties
- local descriptions = data.descriptions
- local unicodes = resources.unicodes
- if not unicodes then
- return
- end
- -- we need to move this code
- unicodes['space'] = unicodes['space'] or 32
- unicodes['hyphen'] = unicodes['hyphen'] or 45
- unicodes['zwj'] = unicodes['zwj'] or 0x200D
- unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
- -- the tounicode mapping is sparse and only needed for alternatives
- local private = fonts.constructors.privateoffset
- local unknown = format("%04X",utfbyte("?"))
- local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
- local tounicode = { }
- local originals = { }
- resources.tounicode = tounicode
- resources.originals = originals
- local lumunic, uparser, oparser
- local cidinfo, cidnames, cidcodes, usedmap
- if false then -- will become an option
- lumunic = loadlumtable(filename)
- lumunic = lumunic and lumunic.tounicode
- end
- --
- cidinfo = properties.cidinfo
- usedmap = cidinfo and fonts.cid.getmap(cidinfo)
- --
- if usedmap then
- oparser = usedmap and makenameparser(cidinfo.ordering)
- cidnames = usedmap.names
- cidcodes = usedmap.unicodes
- end
- uparser = makenameparser()
- local ns, nl = 0, 0
- for unic, glyph in next, descriptions do
- local index = glyph.index
- local name = glyph.name
- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
- local unicode = lumunic and lumunic[name] or unicodevector[name]
- if unicode then
- originals[index] = unicode
- tounicode[index] = tounicode16(unicode,name)
- ns = ns + 1
- end
- -- cidmap heuristics, beware, there is no guarantee for a match unless
- -- the chain resolves
- if (not unicode) and usedmap then
- local foundindex = lpegmatch(oparser,name)
- if foundindex then
- unicode = cidcodes[foundindex] -- name to number
- if unicode then
- originals[index] = unicode
- tounicode[index] = tounicode16(unicode,name)
- ns = ns + 1
- else
- local reference = cidnames[foundindex] -- number to name
- if reference then
- local foundindex = lpegmatch(oparser,reference)
- if foundindex then
- unicode = cidcodes[foundindex]
- if unicode then
- originals[index] = unicode
- tounicode[index] = tounicode16(unicode,name)
- ns = ns + 1
- end
- end
- if not unicode or unicode == "" then
- local foundcodes, multiple = lpegmatch(uparser,reference)
- if foundcodes then
- originals[index] = foundcodes
- if multiple then
- tounicode[index] = tounicode16sequence(foundcodes)
- nl = nl + 1
- unicode = true
- else
- tounicode[index] = tounicode16(foundcodes,name)
- ns = ns + 1
- unicode = foundcodes
- end
- end
- end
- end
- end
- end
- end
- -- a.whatever or a_b_c.whatever or a_b_c (no numbers)
- if not unicode or unicode == "" then
- local split = lpegmatch(ligsplitter,name)
- local nplit = split and #split or 0
- if nplit >= 2 then
- local t, n = { }, 0
- for l=1,nplit do
- local base = split[l]
- local u = unicodes[base] or unicodevector[base]
- if not u then
- break
- elseif type(u) == "table" then
- n = n + 1
- t[n] = u[1]
- else
- n = n + 1
- t[n] = u
- end
- end
- if n == 0 then -- done then
- -- nothing
- elseif n == 1 then
- originals[index] = t[1]
- tounicode[index] = tounicode16(t[1],name)
- else
- originals[index] = t
- tounicode[index] = tounicode16sequence(t)
- end
- nl = nl + 1
- unicode = true
- else
- -- skip: already checked and we don't want privates here
- end
- end
- -- last resort (we might need to catch private here as well)
- if not unicode or unicode == "" then
- local foundcodes, multiple = lpegmatch(uparser,name)
- if foundcodes then
- if multiple then
- originals[index] = foundcodes
- tounicode[index] = tounicode16sequence(foundcodes,name)
- nl = nl + 1
- unicode = true
- else
- originals[index] = foundcodes
- tounicode[index] = tounicode16(foundcodes,name)
- ns = ns + 1
- unicode = foundcodes
- end
- end
- end
- -- if not unicode then
- -- originals[index] = 0xFFFD
- -- tounicode[index] = "FFFD"
- -- end
- end
- end
- if trace_mapping then
- for unic, glyph in table.sortedhash(descriptions) do
- local name = glyph.name
- local index = glyph.index
- local toun = tounicode[index]
- if toun then
- report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun)
- else
- report_fonts("internal slot %U, name %a, unicode %U",index,name,unic)
- end
- end
- end
- if trace_loading and (ns > 0 or nl > 0) then
- report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns)
- end
-end
+if not modules then modules = { } end modules ['font-map'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local tonumber = tonumber + +local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower +local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match +local utfbyte = utf.byte +local floor = math.floor + +local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) +local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) + +local report_fonts = logs.reporter("fonts","loading") -- not otf only + +local fonts = fonts or { } +local mappings = fonts.mappings or { } +fonts.mappings = mappings + +--[[ldx-- +<p>Eventually this code will disappear because map files are kind +of obsolete. Some code may move to runtime or auxiliary modules.</p> +<p>The name to unciode related code will stay of course.</p> +--ldx]]-- + +local function loadlumtable(filename) -- will move to font goodies + local lumname = file.replacesuffix(file.basename(filename),"lum") + local lumfile = resolvers.findfile(lumname,"map") or "" + if lumfile ~= "" and lfs.isfile(lumfile) then + if trace_loading or trace_mapping then + report_fonts("loading map table %a",lumfile) + end + lumunic = dofile(lumfile) + return lumunic, lumfile + end +end + +local hex = R("AF","09") +local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end +local hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end +local dec = (R("09")^1) / tonumber +local period = P(".") +local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) +local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) +local index = P("index") * dec * Cc(false) + +local parser = unicode + ucode + index + +local parsers = { } + +local function makenameparser(str) + if not str or str == "" then + return parser + else + local p = parsers[str] + if not p then + p = P(str) * period * dec * Cc(false) + parsers[str] = p + end + return p + end +end + +-- local parser = makenameparser("Japan1") +-- local parser = makenameparser() +-- local function test(str) +-- local b, a = lpegmatch(parser,str) +-- print((a and table.serialize(b)) or b) +-- end +-- test("a.sc") +-- test("a") +-- test("uni1234") +-- test("uni1234.xx") +-- test("uni12349876") +-- test("u123400987600") +-- test("index1234") +-- test("Japan1.123") + +local function tounicode16(unicode,name) + if unicode < 0x10000 then + return format("%04X",unicode) + elseif unicode < 0x1FFFFFFFFF then + return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00) + else + report_fonts("can't convert %a in %a into tounicode",unicode,name) + end +end + +local function tounicode16sequence(unicodes,name) + local t = { } + for l=1,#unicodes do + local unicode = unicodes[l] + if unicode < 0x10000 then + t[l] = format("%04X",unicode) + elseif unicode < 0x1FFFFFFFFF then + t[l] = format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00) + else + report_fonts ("can't convert %a in %a into tounicode",unicode,name) + end + end + return concat(t) +end + +local function fromunicode16(str) + if #str == 4 then + return tonumber(str,16) + else + local l, r = match(str,"(....)(....)") + return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 + end +end + +-- Slightly slower: +-- +-- local p = C(4) * (C(4)^-1) / function(l,r) +-- if r then +-- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 +-- else +-- return tonumber(l,16) +-- end +-- end +-- +-- local function fromunicode16(str) +-- return lpegmatch(p,str) +-- end + +-- This is quite a bit faster but at the cost of some memory but if we +-- do this we will also use it elsewhere so let's not follow this route +-- now. I might use this method in the plain variant (no caching there) +-- but then I need a flag that distinguishes between code branches. +-- +-- local cache = { } +-- +-- function mappings.tounicode16(unicode) +-- local s = cache[unicode] +-- if not s then +-- if unicode < 0x10000 then +-- s = format("%04X",unicode) +-- else +-- s = format("%04X%04X",unicode/0x400+0xD800,unicode%0x400+0xDC00) +-- end +-- cache[unicode] = s +-- end +-- return s +-- end + +mappings.loadlumtable = loadlumtable +mappings.makenameparser = makenameparser +mappings.tounicode16 = tounicode16 +mappings.tounicode16sequence = tounicode16sequence +mappings.fromunicode16 = fromunicode16 + +local separator = S("_.") +local other = C((1 - separator)^1) +local ligsplitter = Ct(other * (separator * other)^0) + +--~ print(table.serialize(lpegmatch(ligsplitter,"this"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"this.that"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that"))) + +function mappings.addtounicode(data,filename) + local resources = data.resources + local properties = data.properties + local descriptions = data.descriptions + local unicodes = resources.unicodes + if not unicodes then + return + end + -- we need to move this code + unicodes['space'] = unicodes['space'] or 32 + unicodes['hyphen'] = unicodes['hyphen'] or 45 + unicodes['zwj'] = unicodes['zwj'] or 0x200D + unicodes['zwnj'] = unicodes['zwnj'] or 0x200C + -- the tounicode mapping is sparse and only needed for alternatives + local private = fonts.constructors.privateoffset + local unknown = format("%04X",utfbyte("?")) + local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context + local tounicode = { } + local originals = { } + resources.tounicode = tounicode + resources.originals = originals + local lumunic, uparser, oparser + local cidinfo, cidnames, cidcodes, usedmap + if false then -- will become an option + lumunic = loadlumtable(filename) + lumunic = lumunic and lumunic.tounicode + end + -- + cidinfo = properties.cidinfo + usedmap = cidinfo and fonts.cid.getmap(cidinfo) + -- + if usedmap then + oparser = usedmap and makenameparser(cidinfo.ordering) + cidnames = usedmap.names + cidcodes = usedmap.unicodes + end + uparser = makenameparser() + local ns, nl = 0, 0 + for unic, glyph in next, descriptions do + local index = glyph.index + local name = glyph.name + if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then + local unicode = lumunic and lumunic[name] or unicodevector[name] + if unicode then + originals[index] = unicode + tounicode[index] = tounicode16(unicode,name) + ns = ns + 1 + end + -- cidmap heuristics, beware, there is no guarantee for a match unless + -- the chain resolves + if (not unicode) and usedmap then + local foundindex = lpegmatch(oparser,name) + if foundindex then + unicode = cidcodes[foundindex] -- name to number + if unicode then + originals[index] = unicode + tounicode[index] = tounicode16(unicode,name) + ns = ns + 1 + else + local reference = cidnames[foundindex] -- number to name + if reference then + local foundindex = lpegmatch(oparser,reference) + if foundindex then + unicode = cidcodes[foundindex] + if unicode then + originals[index] = unicode + tounicode[index] = tounicode16(unicode,name) + ns = ns + 1 + end + end + if not unicode or unicode == "" then + local foundcodes, multiple = lpegmatch(uparser,reference) + if foundcodes then + originals[index] = foundcodes + if multiple then + tounicode[index] = tounicode16sequence(foundcodes) + nl = nl + 1 + unicode = true + else + tounicode[index] = tounicode16(foundcodes,name) + ns = ns + 1 + unicode = foundcodes + end + end + end + end + end + end + end + -- a.whatever or a_b_c.whatever or a_b_c (no numbers) + if not unicode or unicode == "" then + local split = lpegmatch(ligsplitter,name) + local nplit = split and #split or 0 + if nplit >= 2 then + local t, n = { }, 0 + for l=1,nplit do + local base = split[l] + local u = unicodes[base] or unicodevector[base] + if not u then + break + elseif type(u) == "table" then + n = n + 1 + t[n] = u[1] + else + n = n + 1 + t[n] = u + end + end + if n == 0 then -- done then + -- nothing + elseif n == 1 then + originals[index] = t[1] + tounicode[index] = tounicode16(t[1],name) + else + originals[index] = t + tounicode[index] = tounicode16sequence(t) + end + nl = nl + 1 + unicode = true + else + -- skip: already checked and we don't want privates here + end + end + -- last resort (we might need to catch private here as well) + if not unicode or unicode == "" then + local foundcodes, multiple = lpegmatch(uparser,name) + if foundcodes then + if multiple then + originals[index] = foundcodes + tounicode[index] = tounicode16sequence(foundcodes,name) + nl = nl + 1 + unicode = true + else + originals[index] = foundcodes + tounicode[index] = tounicode16(foundcodes,name) + ns = ns + 1 + unicode = foundcodes + end + end + end + -- if not unicode then + -- originals[index] = 0xFFFD + -- tounicode[index] = "FFFD" + -- end + end + end + if trace_mapping then + for unic, glyph in table.sortedhash(descriptions) do + local name = glyph.name + local index = glyph.index + local toun = tounicode[index] + if toun then + report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun) + else + report_fonts("internal slot %U, name %a, unicode %U",index,name,unic) + end + end + end + if trace_loading and (ns > 0 or nl > 0) then + report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns) + end +end |