From a2ea45941a60348f60a312abf8fb01b086dc41a6 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 7 Apr 2016 23:58:55 +0200 Subject: [*] update dates, version --- src/luaotfload-database.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 1d5dfd8..0adc262 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -1,5 +1,5 @@ if not modules then modules = { } end modules ['luaotfload-database'] = { - version = "2.6", + version = "2.7", comment = "companion to luaotfload-main.lua", author = "Khaled Hosny, Elie Roux, Philipp Gesang", copyright = "Luaotfload Development Team", -- cgit v1.2.3 From a995e0f6a8cbf6d56a99f79a7107508f4d4b038c Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 8 Apr 2016 23:26:02 +0200 Subject: [db] ditch our gzip wrappers We again depend on the full Lualibs set for some time so our wrappers are irrelevant as we can just use the similar once from there. --- src/luaotfload-database.lua | 45 ++++----------------------------------------- 1 file changed, 4 insertions(+), 41 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 0adc262..f5aacbc 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -57,7 +57,8 @@ local fontloaderinfo = fontloader.info local fontloaderclose = fontloader.close local fontloaderopen = fontloader.open ----- fontloaderto_table = fontloader.to_table -local gzipopen = gzip.open +local gzipload = gzip.load +local gzipsave = gzip.save local iolines = io.lines local ioopen = io.open local iopopen = io.popen @@ -361,44 +362,6 @@ local initialize_namedata = function (formats, created) } end ---[[doc-- - - Since Luaotfload does not depend on the lualibs anymore we - have to put our own small wrappers for the gzip library in - place. - - load_gzipped -- Read and decompress and entire gzipped file. - Returns the uncompressed content as a string. - ---doc]]-- - -local load_gzipped = function (filename) - local gh = gzipopen (filename,"rb") - if gh then - local data = gh:read "*all" - gh:close () - return data - end -end - ---[[doc-- - - save_gzipped -- Compress and write a string to file. The return - value is the number of bytes written. Zlib parameters are: best - compression and default strategy. - ---doc]]-- - -local save_gzipped = function (filename, data) - local gh = gzipopen (filename, "wb9") - if gh then - gh:write (data) - local bytes = gh:seek () - gh:close () - return bytes - end -end - --- When loading a lua file we try its binary complement first, which --- is assumed to be located at an identical path, carrying the suffix --- .luc. @@ -427,7 +390,7 @@ local load_lua_file = function (path) if not code then --- probe gzipped file foundname = filereplacesuffix (path, "lua.gz") - local chunk = load_gzipped (foundname) + local chunk = gzipload (foundname) if chunk then code = load (chunk, "t") end @@ -3298,7 +3261,7 @@ save_names = function (currentnames) local gzname = luaname .. ".gz" if config.luaotfload.db.compress then local serialized = tableserialize (currentnames, true) - save_gzipped (gzname, serialized) + gzipsave (gzname, serialized) caches.compile (currentnames, "", lucname) else tabletofile (luaname, currentnames, true) -- cgit v1.2.3 From a02f276d8fd95bd0c644ec89f7822006baacf4b3 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sat, 9 Apr 2016 18:29:31 +0200 Subject: [tool,db] kill off fontforge This has been coming for some time: Upstream now provides full Opentype reader capabilities. This allows Luatex to drop those horrible fontforge libraries. Since the API is pretty similar, for Luaotfload it means little change and a decent speed gain. Though we still need to investigate whether the result is equivalent or at least acceptable. --- src/luaotfload-database.lua | 51 +++++++++++---------------------------------- 1 file changed, 12 insertions(+), 39 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index f5aacbc..58ad2ab 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -53,10 +53,13 @@ local require = require local tonumber = tonumber local unpack = table.unpack -local fontloaderinfo = fontloader.info -local fontloaderclose = fontloader.close -local fontloaderopen = fontloader.open ------ fontloaderto_table = fontloader.to_table +local fonts = fonts or { } +local fontshandlers = fonts.handlers or { } +local otfhandler = fonts.handlers.otf or { } +fonts.handlers = fontshandlers + +local otfreadersgetinfo = otfhandler.readers.getinfo + local gzipload = gzip.load local gzipsave = gzip.save local iolines = io.lines @@ -1224,24 +1227,8 @@ find_closest = function (name, limit) return false end --- find_closest() ---[[doc-- - - load_font_file -- Safely open a font file. See - - regarding the omission of ``fontloader.close()``. - - TODO -- check if fontloader.info() is ready for prime in 0.78+ - -- fields /tables needed: - -- names - -- postscriptname - -- validation_state - -- .. - ---doc]]-- - local load_font_file = function (filename, subfont) - local rawfont, _msg = fontloaderopen (filename, subfont) - --local rawfont, _msg = fontloaderinfo (filename, subfont) + local rawfont, _msg = otfreadersgetinfo (filename, subfont) if not rawfont then logreport ("log", 1, "db", "ERROR: failed to open %s.", filename) return @@ -1293,9 +1280,7 @@ local get_english_names = function (metadata) end --[[-- - In case of broken PS names we set some dummies. However, we cannot - directly modify the font data as returned by fontloader.open() because - it is a userdata object. + In case of broken PS names we set some dummies. For this reason we copy what is necessary whilst keeping the table structure the same as in the tfmdata. @@ -1471,11 +1456,7 @@ ot_fullinfo = function (filename, return nil end - local rawinfo = get_raw_info (metadata, basename) - --- Closing the file manually is a tad faster and more memory - --- efficient than having it closed by the gc - fontloaderclose (metadata) - + local rawinfo = get_raw_info (metadata, basename) local english_names = get_english_names (rawinfo) local namedata = organize_namedata (rawinfo, english_names, @@ -1629,15 +1610,7 @@ local insert_fullinfo = function (fullname, targetentrystatus, info) - local subfont - if n_font ~= false then - subfont = n_font - 1 - else - subfont = false - n_font = 1 - end - - local fullinfo = loader (fullname, subfont, + local fullinfo = loader (fullname, n_font, location, basename, format, info) @@ -1722,7 +1695,7 @@ local read_font_names = function (fullname, --- 4) get basic info, abort if fontloader can’t read it - local info = fontloaderinfo (fullname) + local info = otfreadersgetinfo (fullname) if not info then logreport ("log", 1, "db", -- cgit v1.2.3 From 2a802efe6456f8cfeda53b6df1201413e199812f Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sat, 9 Apr 2016 22:20:10 +0200 Subject: [conf,db] allow switching back to fontforge There are some non-negligible differences in the reader output, especially concerning font names. Until this is sorted out we need a fast way to switch back to the old code for reference. --- src/luaotfload-database.lua | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 58ad2ab..7c5e69b 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -58,7 +58,8 @@ local fontshandlers = fonts.handlers or { } local otfhandler = fonts.handlers.otf or { } fonts.handlers = fontshandlers -local otfreadersgetinfo = otfhandler.readers.getinfo +local read_font_file = otfhandler.readers.getinfo +local read_font_info = read_font_file local gzipload = gzip.load local gzipsave = gzip.save @@ -1228,7 +1229,7 @@ find_closest = function (name, limit) end --- find_closest() local load_font_file = function (filename, subfont) - local rawfont, _msg = otfreadersgetinfo (filename, subfont) + local rawfont, _msg = read_font_file (filename, subfont) if not rawfont then logreport ("log", 1, "db", "ERROR: failed to open %s.", filename) return @@ -1695,7 +1696,7 @@ local read_font_names = function (fullname, --- 4) get basic info, abort if fontloader can’t read it - local info = otfreadersgetinfo (fullname) + local info = read_font_file (fullname) if not info then logreport ("log", 1, "db", @@ -3419,6 +3420,17 @@ local show_cache = function ( ) return true end +local use_fontforge = function (val) + if val == true then + local fontloader = fontloader + read_font_info = fontloader.info + read_font_file = fontloader.open + else + read_font_file = otfhandler.readers.getinfo + read_font_info = read_font_file + end +end + ----------------------------------------------------------------------- --- export functionality to the namespace “fonts.names” ----------------------------------------------------------------------- @@ -3449,7 +3461,8 @@ local export = { erase_cache = erase_cache, show_cache = show_cache, find_closest = find_closest, - -- for testing purpose + --- transitionary + use_fontforge = use_fontforge, } return { -- cgit v1.2.3 From cc17dcae6df9247b32cbeeee59af677838d1b3f5 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 10 Apr 2016 11:01:50 +0200 Subject: [db] do not rely on __gc for font object lifetimes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The penalty for having font object closed automatically is huge: It takes around nine seconds more to rebuild the font database: 58 s with __gc, 49 s by closing manually. Even if it’s not the default, we reintroduce the code for closing fonts manually to avoid that situation. --- src/luaotfload-database.lua | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 7c5e69b..7d0c86b 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -60,6 +60,7 @@ fonts.handlers = fontshandlers local read_font_file = otfhandler.readers.getinfo local read_font_info = read_font_file +local close_font_file = function () end local gzipload = gzip.load local gzipsave = gzip.save @@ -1457,6 +1458,8 @@ ot_fullinfo = function (filename, return nil end + close_font_file (metadata) --> FF only + local rawinfo = get_raw_info (metadata, basename) local english_names = get_english_names (rawinfo) local namedata = organize_namedata (rawinfo, @@ -3425,9 +3428,11 @@ local use_fontforge = function (val) local fontloader = fontloader read_font_info = fontloader.info read_font_file = fontloader.open + close_font_file = fontloader.close else read_font_file = otfhandler.readers.getinfo read_font_info = read_font_file + close_font_file = function () end end end -- cgit v1.2.3 From 783e94f44ea39df0ae877b289b999e8f13aaf3da Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 10 Apr 2016 12:26:13 +0200 Subject: [db] in ff mode, collect font object after access --- src/luaotfload-database.lua | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 7d0c86b..6c02344 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -1266,7 +1266,6 @@ local get_english_names = function (metadata) local english_names if names then - --inspect(names) for _, raw_namedata in next, names do if raw_namedata.lang == "English (US)" then return raw_namedata.names @@ -1458,8 +1457,6 @@ ot_fullinfo = function (filename, return nil end - close_font_file (metadata) --> FF only - local rawinfo = get_raw_info (metadata, basename) local english_names = get_english_names (rawinfo) local namedata = organize_namedata (rawinfo, @@ -1481,6 +1478,7 @@ ot_fullinfo = function (filename, style = style, version = rawinfo.version, } + close_font_file (metadata) --> FF only return res end -- cgit v1.2.3 From adbde7d798a5b39d6524fe39396fe1cc8de71d09 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Tue, 12 Apr 2016 08:14:23 +0200 Subject: [db] adjust names extraction to Lua loader --- src/luaotfload-database.lua | 138 +++++++++++++++++++++++++------------------- 1 file changed, 79 insertions(+), 59 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 6c02344..24c2745 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -58,9 +58,10 @@ local fontshandlers = fonts.handlers or { } local otfhandler = fonts.handlers.otf or { } fonts.handlers = fontshandlers -local read_font_file = otfhandler.readers.getinfo +local read_font_file = otfhandler.readers.loadfont local read_font_info = read_font_file local close_font_file = function () end +local get_english_names local gzipload = gzip.load local gzipsave = gzip.save @@ -297,7 +298,6 @@ This is a sketch of the luaotfload db: prefmodifiers : string; // sanitized preferred subfamily (names table 14) psname : string; // PostScript name size : (false | float * float * float); // if available, size info from the size table converted from decipoints - splainname : string; // sanitized version of the “plainname” field splitstyle : string; // style information obtained by splitting the full name at the last dash subfamily : string; // sanitized subfamily (names table 2) subfont : (int | bool); // integer if font is part of a TrueType collection ("ttc") @@ -943,7 +943,6 @@ local lookup_fontname = function (specification, name, style) local prefmodifiers = face.prefmodifiers local subfamily = face.subfamily if face.fontname == name - or face.splainname == name or face.fullname == name or face.psname == name then @@ -1240,10 +1239,10 @@ end --- rawdata -> (int * int * int | bool) -local get_size_info = function (metadata) - local design_size = metadata.design_size - local design_range_top = metadata.design_range_top - local design_range_bottom = metadata.design_range_bottom +local get_size_info = function (rawinfo) + local design_size = rawinfo.design_size + local design_range_top = rawinfo.design_range_top + local design_range_bottom = rawinfo.design_range_bottom local fallback_size = design_size ~= 0 and design_size or design_range_bottom ~= 0 and design_range_bottom @@ -1261,10 +1260,14 @@ local get_size_info = function (metadata) return false end -local get_english_names = function (metadata) - local names = metadata.names - local english_names +--[[doc-- + get_english_names_from_ff -- For legacy Fontforge-style names + tables. Extracted from the actual names table, not the font item + itself. +--doc]]-- +local get_english_names_from_ff = function (metadata) + local names = metadata.names if names then for _, raw_namedata in next, names do if raw_namedata.lang == "English (US)" then @@ -1280,6 +1283,33 @@ local get_english_names = function (metadata) fullname = metadata.fullname, } end +--[[doc-- + map_enlish_names -- Names-table for Lua fontloader objects. This + may vanish eventually once we ditch Fontforge completely. Only + subset of entries of that table are actually relevant so we’ll + stick to that part. +--doc]]-- + +local names_items = { + compatfull = "compatiblename", + fullname = "fullname", + postscriptname = "fontname", + preffamily = "familyname", + prefmodifiers = "subfamilyname", + family = "family", + subfamily = "subfamily", +} + +local map_english_names = function (metadata) + local nameinfo = { } + for ours, theirs in next, names_items do + nameinfo [ours] = metadata [theirs] + end + return nameinfo +end + +get_english_names = map_english_names + --[[-- In case of broken PS names we set some dummies. @@ -1287,10 +1317,8 @@ end structure the same as in the tfmdata. --]]-- local get_raw_info = function (metadata, basename) - local fullname local fontname = metadata.fontname local fullname = metadata.fullname - local psname local validation_state = metadata.validation_state if (validation_state and tablecontains (validation_state, "bad_ps_fontname")) @@ -1313,32 +1341,31 @@ local get_raw_info = function (metadata, basename) fullname = fullname, italicangle = metadata.italicangle, names = metadata.names, - pfminfo = metadata.pfminfo, units_per_em = metadata.units_per_em, version = metadata.version, - design_size = metadata.design_size, - design_range_top = metadata.design_range_top, - design_range_bottom = metadata.design_range_bottom, + design_size = metadata.design_size or metadata.designsize, + design_range_top = metadata.design_range_top or metadata.maxsize, + design_range_bottom = metadata.design_range_bottom or metadata.minsize, } end local organize_namedata = function (rawinfo, - english_names, + nametable, basename, info) - local default_name = english_names.compatfull - or english_names.fullname - or english_names.postscriptname + local default_name = nametable.compatfull + or nametable.fullname + or nametable.postscriptname or rawinfo.fullname or rawinfo.fontname or info.fullname or info.fontname - local default_family = english_names.preffamily - or english_names.family + local default_family = nametable.preffamily + or nametable.family or rawinfo.familyname or info.familyname --- local default_modifier = english_names.prefmodifiers --- or english_names.subfamily +-- local default_modifier = nametable.prefmodifiers +-- or nametable.subfamily local fontnames = { --- see --- https://developer.apple.com/fonts/TTRefMan/RM06/Chap6name.html @@ -1353,19 +1380,19 @@ local organize_namedata = function (rawinfo, --- non-abbreviated fashion, for most fonts at any rate. --- However, in some fonts (e.g. CMU) all three fields are --- identical. - fullname = --[[ 18 ]] english_names.compatfull - or --[[ 4 ]] english_names.fullname + fullname = --[[ 18 ]] nametable.compatfull + or --[[ 4 ]] nametable.fullname or default_name, --- we keep both the “preferred family” and the “family” --- values around since both are valid but can turn out --- quite differently, e.g. with Latin Modern: --- preffamily: “Latin Modern Sans”, --- family: “LM Sans 10” - preffamily = --[[ 16 ]] english_names.preffamilyname, - family = --[[ 1 ]] english_names.family or default_family, - prefmodifiers = --[[ 17 ]] english_names.prefmodifiers, - subfamily = --[[ 2 ]] english_names.subfamily, - psname = --[[ 6 ]] english_names.postscriptname, + preffamily = --[[ 16 ]] nametable.preffamilyname, + family = --[[ 1 ]] nametable.family or default_family, + prefmodifiers = --[[ 17 ]] nametable.prefmodifiers, + subfamily = --[[ 2 ]] nametable.subfamily or rawinfo.subfamilyname, + psname = --[[ 6 ]] nametable.postscriptname, }, metadata = { @@ -1417,22 +1444,19 @@ local split_fontname = function (fontname) end end -local organize_styledata = function (fontname, - metadata, - english_names, - info) - local pfminfo = metadata.pfminfo or { } - local names = metadata.names +local organize_styledata = function (metadata, rawinfo, info) + local pfminfo = metadata.pfminfo + local names = rawinfo.names return { --- see http://www.microsoft.com/typography/OTSPEC/features_pt.htm#size - size = get_size_info (metadata), - weight = pfminfo.weight or 400, - split = split_fontname (fontname), - width = pfminfo.width, + size = get_size_info (rawinfo), + weight = pfminfo and pfminfo.weight or metadata.pfmweight or 400, + split = split_fontname (rawinfo.fontname), + width = pfminfo and pfminfo.width or metadata.pfmwidth, italicangle = metadata.italicangle, --- this is for querying, see www.ntg.nl/maps/40/07.pdf for details - units_per_em = metadata.units_per_em, + units_per_em = metadata.units_per_em or metadata.units, version = metadata.version, } end @@ -1458,14 +1482,13 @@ ot_fullinfo = function (filename, end local rawinfo = get_raw_info (metadata, basename) - local english_names = get_english_names (rawinfo) + local nametable = get_english_names (metadata) local namedata = organize_namedata (rawinfo, - english_names, + nametable, basename, info) - local style = organize_styledata (namedata.fontname, + local style = organize_styledata (metadata, rawinfo, - english_names, info) local res = { @@ -1508,7 +1531,6 @@ t1_fullinfo = function (filename, _subfont, location, basename, format) sanitized = sanitize_fontnames ({ fontname = fontname, psname = fullname, - pfullname = fullname, metafamily = family, familyname = familyname, weight = metadata.weight, --- string identifier @@ -1535,13 +1557,12 @@ t1_fullinfo = function (filename, _subfont, location, basename, format) fontname = sanitized.fontname, familyname = sanitized.familyname, plainname = fullname, - splainname = sanitized.fullname, psname = sanitized.fontname, version = metadata.version, size = false, splitstyle = splitstyle, fontstyle_name = style ~= "" and style or weight, - weight = metadata.pfminfo.weight or 400, + weight = metadata.pfminfo and pfminfo.weight or 400, italicangle = italicangle, } end @@ -2553,7 +2574,6 @@ local pull_values = function (entry) entry.psname = english.psname entry.fontname = info.fontname or metadata.fontname entry.fullname = english.fullname or info.fullname - entry.splainname = metadata.fullname entry.prefmodifiers = english.prefmodifiers local metafamily = metadata.familyname local familyname = english.preffamily or english.family @@ -2588,8 +2608,6 @@ local add_family = function (name, subtable, modifier, entry) subtable [name] = familytable end - local size = entry.size - familytable [#familytable + 1] = { index = entry.index, modifier = modifier, @@ -3423,14 +3441,16 @@ end local use_fontforge = function (val) if val == true then - local fontloader = fontloader - read_font_info = fontloader.info - read_font_file = fontloader.open - close_font_file = fontloader.close + local fontloader = fontloader + read_font_info = fontloader.info + read_font_file = fontloader.open + close_font_file = fontloader.close + get_english_names = get_english_names_from_ff else - read_font_file = otfhandler.readers.getinfo - read_font_info = read_font_file - close_font_file = function () end + read_font_file = otfhandler.readers.getinfo + read_font_info = read_font_file + close_font_file = function () end + get_english_names = map_english_names end end -- cgit v1.2.3 From f0deb1226705f86d849e40a83c7a9b027f8a2957 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Wed, 13 Apr 2016 00:51:30 +0200 Subject: [db] add some stats regarding the new loader --- src/luaotfload-database.lua | 124 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 99 insertions(+), 25 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 24c2745..0557c1a 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -8,31 +8,105 @@ if not modules then modules = { } end modules ['luaotfload-database'] = { --[[doc-- - Some statistics: - - a) TL 2012, mkluatexfontdb --force - b) v2.4, luaotfload-tool --update --force - c) v2.4, luaotfload-tool --update --force --formats=+afm,pfa,pfb - d) Context, mtxrun --script fonts --reload --force - - (Keep in mind that Context does index fewer fonts since it - considers only the contents of the minimals tree, not the - tex live one!) - - time (m:s) peak VmSize (kB) - a 1:19 386 018 - b 0:37 715 797 - c 2:27 1 017 674 - d 0:44 1 082 313 - - Most of the increase in memory consumption from version 1.x to 2.2+ - can be attributed to the move from single-pass to a multi-pass - approach to building the index: Information is first gathered from - all reachable fonts and only afterwards processed, classified and - discarded. Also, there is a good deal of additional stuff kept in - the database now: two extra tables for file names and font families - have been added, making font lookups more efficient while improving - maintainability of the code. + With version 2.7 we killed of the Fontforge libraries in favor of + the Lua implementation of the OT format reader. There were many + reasons to do this on top of the fact that FF won’t be around at + version 1.0 anymore: In addition to maintainability, memory safety + and general code hygiene, the new reader shows an amazing + performance: Scanning of the 3200 font files on my system takes + around 23 s now, as opposed to 74 s with the Fontforge libs. Memory + usage has improved drastically as well, as illustrated by these + profiles: + + GB + 1.324^ # + | # + | ::# + | : #:: + | @: #: + | @: #: + | @@@: #: + | @@@ @: #: + | @@ @ @ @: #: + | @ @@:@ @ @: #: : + | @@ : @ :@ @ @: #: : + | @@: ::@@ :@@ ::@ :@ @ @: #: : ::: + | @@ : :: @@ :@ : @ :@ @ @: #: : :: : + | @@ : ::: @@ :@ ::: @ :@ @ @: #: :: :: : + | @@@@ :: :::: @@ :@ : : @ :@ @ @: #: :: :::: :: + | :@ @@ :: :::: @@ :@ : :: : @ :@ @ @: #: :: :: :: :: + | @:@ @@ :: ::::: @@ :@ : :::: : @ :@ @ @: #: :::: :::: :: :: + | @@:@ @@ :: ::::::: @@ :@ : : :: : @ :@ @ @: #: ::: @: :: :: ::@ + | @@@:@ @@ :: :: ::::: @@ :@ ::: :: : @ :@ @ @: #: ::: @: :: :: ::@ + | @@@@@:@ @@ ::::::: ::::: @@ :@ ::: :: : @ :@ @ @: #: ::: ::@: :: :: ::@ + 0 +----------------------------------------------------------------------->GB + 0 16.29 + + This is the memory usage during a complete database rebuild with + the Fontforge libraries. The same action using the new + ``getinfo()`` method gives a different picture: + + MB + 43.37^ # + | @ @ @# + | @@ @ @ @# : + | @@@ : @: @ @ @# : + | @ @@@ : : @: @ @: :@# : + | @ @ @@@ : : @: @ @: :@# : + | @ : : :@ @@@:::: @::@ @: :@#:: : + | :: : @ : @ : :::@ @ :@@@:::::@::@ @:::@#:::: + | : @ : :: : :@:: :@: :::::@ @ :@@@:::::@::@:@:::@#:::: + | :: :@ : @ ::@:@:::@:: :@: :::::@: :@ :@@@:::::@::@:@:::@#:::: + | :: :@::: :@ ::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :::::@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | ::: :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :::: :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :::: :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :::: :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :::: :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :::: :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + | :::: :@::::@::: :@:::@:@: :@::::@::::::::@:::@::@@@:::::@::@:@:::@#:::: + 0 +----------------------------------------------------------------------->GB + 0 3.231 + + FF peaks at around 1.4 GB after 12.5 GB worth of allocations, + whereas the Lua implementation arrives at around 45 MB after 3.2 GB + total: + + impl time(B) total(B) useful-heap(B) extra-heap(B) + fontforge 12,496,407,184 1,421,150,144 1,327,888,638 93,261,506 + lua 3,263,698,960 45,478,640 37,231,892 8,246,748 + + Much of the inefficiency of Fontforge is a direct consequence of + having to parse the entire font to extract what essentially boils + down to a couple hundred bytes of metadata per font. Since some + information like design sizes (oh, Adobe!) is stuffed away in + Opentype tables, the vastly more efficient approach of + fontloader.info() proves insufficient for indexing. Thus, we ended + up using fontloader.open() which causes even the character tables + to be parsed, which incidentally are responsible for most of the + allocations during that peak of 1.4 GB measured above, along with + the encodings: + + 20.67% (293,781,048B) 0x6A8F72: SplineCharCreate (splineutil.c:3878) + 09.82% (139,570,318B) 0x618ACD: _FontViewBaseCreate (fontviewbase.c:84) + 08.77% (124,634,384B) 0x6A8FB3: SplineCharCreate (splineutil.c:3885) + 04.53% (64,436,904B) in 80 places, all below massif's threshold (1.00%) + 02.68% (38,071,520B) 0x64E14E: addKernPair (parsettfatt.c:493) + 01.04% (14,735,320B) 0x64DE7D: addPairPos (parsettfatt.c:452) + 39.26% (557,942,484B) 0x64A4E0: PsuedoEncodeUnencoded (parsettf.c:5706) + + What gives? For 2.7 we expect a rougher transition than a year back + due to the complete revamp of the OT loading code. Breakage of + fragile aspects like font and style names has been anticipated and + addressed prior to the 2016 pretest release. In contrast to the + earlier approach of letting FF do a complete dump and then harvest + identifiers from the output we now have to coordinate with upstream + as to which fields are actually needed in order to provide a + similarly acceptable name → file lookup. On the bright side, these + things are a lot simpler to fix than the rather tedious work of + having users update their Luatex binary =) --doc]]-- -- cgit v1.2.3 From 9fddbb6e4a043652357a3797364aa2deb15b4efe Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Wed, 13 Apr 2016 08:19:06 +0200 Subject: [db] pull raw name information with getinfo() This facility was added by Hans to accomodate our peculiar requirements: There should be no fallback from prefmodifiers to familyname since that removes valuable information about larger font sets like the Adobe ones. --- src/luaotfload-database.lua | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 0557c1a..0e8f86a 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -1303,7 +1303,7 @@ find_closest = function (name, limit) end --- find_closest() local load_font_file = function (filename, subfont) - local rawfont, _msg = read_font_file (filename, subfont) + local rawfont, _msg = read_font_file (filename, subfont, true) if not rawfont then logreport ("log", 1, "db", "ERROR: failed to open %s.", filename) return @@ -1375,9 +1375,15 @@ local names_items = { } local map_english_names = function (metadata) + local namesource + local platformames = metadata.platformnames + if platformnames then + namesource = platformnames.windows or platformnames.macintosh + end + namesource = namesource or metadata local nameinfo = { } for ours, theirs in next, names_items do - nameinfo [ours] = metadata [theirs] + nameinfo [ours] = namesource [theirs] end return nameinfo end @@ -1605,7 +1611,7 @@ t1_fullinfo = function (filename, _subfont, location, basename, format) sanitized = sanitize_fontnames ({ fontname = fontname, psname = fullname, - metafamily = family, + metafamily = familyname, familyname = familyname, weight = metadata.weight, --- string identifier prefmodifiers = style, -- cgit v1.2.3 From 9bc4f17b9bf0dcf3a3865ae2eb433818a5c70573 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 14 Apr 2016 00:02:28 +0200 Subject: [db] fix extraction of name info important for family clustering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … or perhaps more accurately, “megafamily”. For the time being we prefer the “windows” versions of the fonts due to the higher quality of the “typographic family” and “subfamily” fields. Another advantage of the new loader over FF is that we’re even given that choice. --- src/luaotfload-database.lua | 174 +++++++++++++++++++++++++------------------- 1 file changed, 98 insertions(+), 76 deletions(-) (limited to 'src/luaotfload-database.lua') diff --git a/src/luaotfload-database.lua b/src/luaotfload-database.lua index 0e8f86a..fd806e0 100644 --- a/src/luaotfload-database.lua +++ b/src/luaotfload-database.lua @@ -372,7 +372,6 @@ This is a sketch of the luaotfload db: prefmodifiers : string; // sanitized preferred subfamily (names table 14) psname : string; // PostScript name size : (false | float * float * float); // if available, size info from the size table converted from decipoints - splitstyle : string; // style information obtained by splitting the full name at the last dash subfamily : string; // sanitized subfamily (names table 2) subfont : (int | bool); // integer if font is part of a TrueType collection ("ttc") version : string; // font version string @@ -612,6 +611,12 @@ local italic_synonym = { italic = true, } +local bold_synonym = { + bold = true, + black = true, + heavy = true, +} + local style_category = { regular = "r", bold = "b", @@ -1365,19 +1370,50 @@ end --doc]]-- local names_items = { - compatfull = "compatiblename", + compatfull = "compatiblefullname", fullname = "fullname", - postscriptname = "fontname", - preffamily = "familyname", - prefmodifiers = "subfamilyname", + postscriptname = "postscriptname", + preffamily = "typographicfamily", + prefmodifiers = "typographicsubfamily", family = "family", subfamily = "subfamily", } local map_english_names = function (metadata) local namesource - local platformames = metadata.platformnames + local platformnames = metadata.platformnames + --[[-- + Hans added the “platformnames” option for us to access parts of + the original name table. The names are unreliable and + completely disorganized, sure, but the Windows variant of the + field often contains the superior information. Case in point: + + ["platformnames"]={ + ["macintosh"]={ + ["compatiblefullname"]="Garamond Premr Pro Smbd It", + ["family"]="Garamond Premier Pro", + ["fullname"]="Garamond Premier Pro Semibold Italic", + ["postscriptname"]="GaramondPremrPro-SmbdIt", + ["subfamily"]="Semibold Italic", + }, + ["windows"]={ + ["family"]="Garamond Premr Pro Smbd", + ["fullname"]="GaramondPremrPro-SmbdIt", + ["postscriptname"]="GaramondPremrPro-SmbdIt", + ["subfamily"]="Italic", + ["typographicfamily"]="Garamond Premier Pro", + ["typographicsubfamily"]="Semibold Italic", + }, + }, + + The essential bit is contained as “typographicfamily” (which we + call for historical reasons the “preferred family”) and the + “subfamily”. Only Why this is the case, only Adobe knows for + certain. + --]]-- if platformnames then + --inspect(metadata) + --namesource = platformnames.macintosh or platformnames.windows namesource = platformnames.windows or platformnames.macintosh end namesource = namesource or metadata @@ -1531,7 +1567,8 @@ local organize_styledata = function (metadata, rawinfo, info) return { --- see http://www.microsoft.com/typography/OTSPEC/features_pt.htm#size size = get_size_info (rawinfo), - weight = pfminfo and pfminfo.weight or metadata.pfmweight or 400, + pfmweight = pfminfo and pfminfo.weight or metadata.pfmweight or 400, + weight = rawinfo.weight or metadata.weight or "unspecified", split = split_fontname (rawinfo.fontname), width = pfminfo and pfminfo.width or metadata.pfmwidth, italicangle = metadata.italicangle, @@ -1604,7 +1641,6 @@ t1_fullinfo = function (filename, _subfont, location, basename, format) local fullname = metadata.fullname local familyname = metadata.familyname local italicangle = metadata.italicangle - local splitstyle = split_fontname (fontname) local style = "" local weight @@ -1640,7 +1676,6 @@ t1_fullinfo = function (filename, _subfont, location, basename, format) psname = sanitized.fontname, version = metadata.version, size = false, - splitstyle = splitstyle, fontstyle_name = style ~= "" and style or weight, weight = metadata.pfminfo and pfminfo.weight or 400, italicangle = italicangle, @@ -2557,23 +2592,27 @@ generate_filedata = function (mappings) end local pick_style +local pick_fallback_style local check_regular do - local splitfontname = lpeg.splitat "-" - local choose_exact = function (field) --- only clean matches, without guessing if italic_synonym [field] then return "i" end - if field == "bold" then + if stringsub (field, 1, 10) == "bolditalic" + or stringsub (field, 1, 11) == "boldoblique" then + return "bi" + end + + if stringsub (field, 1, 4) == "bold" then return "b" end - if field == "bolditalic" or field == "boldoblique" then - return "bi" + if stringsub (field, 1, 6) == "italic" then + return "i" end return false @@ -2581,10 +2620,9 @@ do pick_style = function (fontstyle_name, prefmodifiers, - subfamily, - splitstyle) + subfamily) local style - if fontstyle_name then + if fontstyle_name --[[ff only]] then style = choose_exact (fontstyle_name) end if not style then @@ -2597,9 +2635,9 @@ do return style end - pick_fallback_style = function (italicangle, weight) + pick_fallback_style = function (italicangle, weight, pfmweight) --- more aggressive, but only to determine bold faces - if weight > 500 then --- bold spectrum matches + if pfmweight > 500 or bold_synonym [weight] then --- bold spectrum matches if italicangle == 0 then return tostring (weight) else @@ -2615,23 +2653,29 @@ do check_regular = function (fontstyle_name, prefmodifiers, subfamily, - splitstyle, italicangle, - weight) - - if fontstyle_name then - return regular_synonym [fontstyle_name] - elseif prefmodifiers then - return regular_synonym [prefmodifiers] - elseif subfamily then - return regular_synonym [subfamily] - elseif splitstyle then - return regular_synonym [splitstyle] - elseif italicangle == 0 and weight == 400 then - return true + weight, + pfmweight) + local plausible_weight + --[[-- + This filters out undesirable candidates that specify their + prefmodifiers or subfamily as “regular” but are actually of + “semibold” or other weight—another drawback of the + oversimplifying classification into only three styles (r, i, + b, bi). + --]]-- + + if italicangle == 0 then + if pfmweight == 400 then plausible_weight = true + elseif weight and regular_synonym [weight] then plausible_weight = true end + end + + if plausible_weight then + return fontstyle_name and regular_synonym [fontstyle_name] + or prefmodifiers and regular_synonym [prefmodifiers] + or subfamily and regular_synonym [subfamily] end - - return nil + return false end end @@ -2655,12 +2699,7 @@ local pull_values = function (entry) entry.fontname = info.fontname or metadata.fontname entry.fullname = english.fullname or info.fullname entry.prefmodifiers = english.prefmodifiers - local metafamily = metadata.familyname - local familyname = english.preffamily or english.family - entry.familyname = familyname - if familyname ~= metafamily then - entry.metafamily = metadata.familyname - end + entry.familyname = metadata.familyname or english.preffamily or english.family entry.fontstyle_name = sanitized.fontstyle_name entry.plainname = names.fullname entry.subfamily = english.subfamily @@ -2668,8 +2707,8 @@ local pull_values = function (entry) --- pull style info ... entry.italicangle = style.italicangle entry.size = style.size - entry.splitstyle = style.split entry.weight = style.weight + entry.pfmweight = style.pfmweight if config.luaotfload.db.strip == true then entry.file = nil @@ -2726,56 +2765,39 @@ collect_families = function (mappings) local subtable = get_subtable (families, entry) local familyname = entry.familyname - local metafamily = entry.metafamily local fontstyle_name = entry.fontstyle_name local prefmodifiers = entry.prefmodifiers local subfamily = entry.subfamily local weight = entry.weight + local pfmweight = entry.pfmweight local italicangle = entry.italicangle - local splitstyle = entry.splitstyle local modifier = pick_style (fontstyle_name, prefmodifiers, - subfamily, - splitstyle) + subfamily) if not modifier then --- regular, exact only modifier = check_regular (fontstyle_name, prefmodifiers, subfamily, - splitstyle, italicangle, - weight) - end + weight, + pfmweight) + end + --if familyname == "garamondpremierpro" then + --print(entry.fullname, "reg?",modifier, "->",fontstyle_name, + --prefmodifiers, + --subfamily, + --italicangle, + --pfmweight, + --weight) + --end if modifier then add_family (familyname, subtable, modifier, entry) - --- registering the metafamilies is unreliable within the - --- same table as identifiers might interfere with an - --- unmarked style that lacks a metafamily, e.g. - --- - --- iwona condensed regular -> - --- family: iwonacond - --- metafamily: iwona - --- iwona regular -> - --- family: iwona - --- metafamily: ø - --- - --- Both would be registered as under the same family, - --- i.e. “iwona”, and depending on the loading order - --- the query “name:iwona” can resolve to the condensed - --- version instead of the actual unmarked one. The only - --- way around this would be to introduce a separate - --- table for metafamilies and do fallback queries on it. - --- At the moment this is not pressing enough to justify - --- further increasing the index size, maybe if need - --- arises from the user side. --- if metafamily and metafamily ~= familyname then --- add_family (metafamily, subtable, modifier, entry) --- end - elseif weight > 500 then -- in bold spectrum - modifier = pick_fallback_style (italicangle, weight) + elseif pfmweight > 500 then -- in bold spectrum + modifier = pick_fallback_style (italicangle, weight, pfmweight) if modifier then add_family (familyname, subtable, modifier, entry) end @@ -2851,7 +2873,7 @@ group_modifiers = function (mappings, families) if modifier == info_modifier then local index = info.index local entry = mappings [index] - local weight = entry.weight + local weight = entry.pfmweight local diff = weight < 700 and 700 - weight or weight - 700 if diff < minimum then minimum = diff @@ -2870,7 +2892,7 @@ group_modifiers = function (mappings, families) local index = info.index local entry = mappings [index] local size = entry.size - if entry.weight == closest then + if entry.pfmweight == closest then if size then entries [#entries + 1] = { size [1], @@ -2933,7 +2955,7 @@ local collect_font_filenames = function () local bisect = config.luaotfload.misc.bisect local max_fonts = config.luaotfload.db.max_fonts --- XXX revisit for lua 5.3 wrt integers - tableappend (filenames, collect_font_filenames_texmf ()) + --tableappend (filenames, collect_font_filenames_texmf ()) tableappend (filenames, collect_font_filenames_system ()) if config.luaotfload.db.scan_local == true then tableappend (filenames, collect_font_filenames_local ()) -- cgit v1.2.3