summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <phg42.2a@gmail.com>2013-04-27 18:07:44 +0200
committerPhilipp Gesang <phg42.2a@gmail.com>2013-04-27 18:07:44 +0200
commit90c3c46541e8508982254ee43eff8983a0444e66 (patch)
tree2817fd329e2c2d5f690b6be619860e51ce0dbe0d
parenta82850e3121af3f0c2412a9a2d8b442cf44cbac2 (diff)
downloadluaotfload-90c3c46541e8508982254ee43eff8983a0444e66.tar.gz
reworked fonts.conf parser
improvements: * works without ``io.popen()`` * scans attributes (like ``prefix``) as well * abides by xdg config rules * no line-based parsing * parsing and interpreting the results are now distinct steps (-> less files open at one time) * LPeg instead of string pattern matching
-rw-r--r--luaotfload-database.lua310
1 files changed, 217 insertions, 93 deletions
diff --git a/luaotfload-database.lua b/luaotfload-database.lua
index 4980766..edc3a5b 100644
--- a/luaotfload-database.lua
+++ b/luaotfload-database.lua
@@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['luaotfload-database'] = {
license = "GNU GPL v2"
}
---- TODO: if the specification is an absolute filename with a font not in the
+--- TODO: if the specification is an absolute filename with a font not in the
--- database, add the font to the database and load it. There is a small
--- difficulty with the filenames of the TEXMF tree that are referenced as
--- relative paths...
@@ -25,6 +25,8 @@ local kpseexpand_path = kpse.expand_path
local kpseexpand_var = kpse.expand_var
local kpselookup = kpse.lookup
local kpsereadable_file = kpse.readable_file
+local lfsisdir = lfs.isdir
+local lfsisfile = lfs.isfile
local mathabs = math.abs
local mathmin = math.min
local stringfind = string.find
@@ -1154,97 +1156,217 @@ end
in OSFONTDIR.
]]
---- (string -> tab -> tab -> tab)
-read_fonts_conf = function (path, results, passed_paths)
- --[[
- This function parses /etc/fonts/fonts.conf and returns all the dir
- it finds. The code is minimal, please report any error it may
- generate.
-
- TODO fonts.conf are some kind of XML so in theory the following
- is totally inappropriate. Maybe a future version of the
- lualibs will include the lxml-* files from Context so we
- can write something presentable instead.
- ]]
- local fh = ioopen(path)
- passed_paths[#passed_paths+1] = path
- passed_paths_set = tabletohash(passed_paths, true)
- if not fh then
- report("log", 2, "db", "cannot open file %s", path)
- return results
+local read_fonts_conf
+do --- closure for read_fonts_conf()
+
+ local lpeg = require "lpeg"
+
+ local C, Cc, Cf, Cg, Ct
+ = lpeg.C, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Ct
+
+ local P, R, S, lpegmatch
+ = lpeg.P, lpeg.R, lpeg.S, lpeg.match
+
+ local alpha = R("az", "AZ")
+ local digit = R"09"
+ local tag_name = C(alpha^1)
+ local whitespace = S" \n\r\t\v"
+ local ws = whitespace^1
+ local comment = P"<!--" * (1 - P"--")^0 * P"-->"
+
+ ---> header specifica
+ local xml_declaration = P"<?xml" * (1 - P"?>")^0 * P"?>"
+ local xml_doctype = P"<!DOCTYPE" * ws
+ * "fontconfig" * (1 - P">")^0 * P">"
+ local header = xml_declaration^-1
+ * (xml_doctype + comment + ws)^0
+
+ ---> enforce root node
+ local root_start = P"<" * ws^-1 * P"fontconfig" * ws^-1 * P">"
+ local root_stop = P"</" * ws^-1 * P"fontconfig" * ws^-1 * P">"
+
+ local dquote, squote = P[["]], P"'"
+ local xml_namestartchar = S":_" + alpha --- ascii only, funk the rest
+ local xml_namechar = S":._" + alpha + digit
+ local xml_name = ws^-1
+ * C(xml_namestartchar * xml_namechar^0)
+ local xml_attvalue = dquote * C((1 - S[[%&"]])^1) * dquote * ws^-1
+ + squote * C((1 - S[[%&']])^1) * squote * ws^-1
+ local xml_attr = Cg(xml_name * P"=" * xml_attvalue)
+ local xml_attr_list = Cf(Ct"" * xml_attr^1, rawset)
+
+ --[[doc--
+ scan_node creates a parser for a given xml tag.
+ --doc]]--
+ --- string -> bool -> lpeg_t
+ local scan_node = function (tag)
+ --- Node attributes go into a table with the index “attributes”
+ --- (relevant for “prefix="xdg"” and the likes).
+ local p_tag = P(tag)
+ local with_attributes = P"<" * p_tag
+ * Cg(xml_attr_list, "attributes")^-1
+ * ws^-1
+ * P">"
+ local plain = P"<" * p_tag * ws^-1 * P">"
+ local node_start = plain + with_attributes
+ local node_stop = P"</" * p_tag * ws^-1 * P">"
+ --- there is no nesting, the earth is flat ...
+ local node = node_start
+ * Cc(tag) * C(comment + (1 - node_stop)^1)
+ * node_stop
+ return Ct(node) -- returns {string, string [, attributes = { key = val }] }
end
- local incomments = false
- for line in fh:lines() do
- while line and line ~= "" do
- -- spaghetti code... hmmm...
- if incomments then
- local tmp = stringfind(line, '-->') --- wtf?
- if tmp then
- incomments = false
- line = stringsub(line, tmp+3)
- else
- line = nil
+
+ --[[doc--
+ At the moment, the interesting tags are “dir” for
+ directory declarations, and “include” for including
+ further configuration files.
+
+ spec: http://freedesktop.org/software/fontconfig/fontconfig-user.html
+ --doc]]--
+ local include_node = scan_node"include"
+ local dir_node = scan_node"dir"
+
+ local element = dir_node
+ + include_node
+ + comment --> ignore
+ + P(1-root_stop) --> skip byte
+
+ local root = root_start * Ct(element^0) * root_stop
+ local p_cheapxml = header * root
+
+ --lpeg.print(p_cheapxml) ---> 757 rules with v0.10
+
+ --[[doc--
+ fonts_conf_scanner() handles configuration files.
+ It is called on an abolute path to a config file (e.g.
+ /home/luser/.config/fontconfig/fonts.conf) and returns a list
+ of the nodes it managed to extract from the file.
+ --doc]]--
+ --- string -> path list
+ local fonts_conf_scanner = function (path)
+ local fh = ioopen(path, "r")
+ if not fh then
+ report("both", 3, "db", "cannot open fontconfig file %s", path)
+ return
+ end
+ local raw = fh:read"*all"
+ fh:close()
+
+ local confdata = lpegmatch(p_cheapxml, raw)
+ if not confdata then
+ report("both", 3, "db", "cannot scan fontconfig file %s", path)
+ return
+ end
+ return confdata
+ end
+
+ --[[doc--
+ read_fonts_conf_indeed() is called with six arguments; the
+ latter three are tables that represent the state and are
+ always returned.
+ The first three are
+ · the path to the file
+ · the expanded $HOME
+ · the expanded $XDG_CONFIG_DIR
+ --doc]]--
+ --- string -> string -> string -> tab -> tab -> (tab * tab * tab)
+ local read_fonts_conf_indeed
+ read_fonts_conf_indeed = function (start, home, xdg_home,
+ acc, done, dirs_done)
+
+ local paths = fonts_conf_scanner(start)
+ if not paths then --- nothing to do
+ return acc, done, dirs_done
+ end
+
+ for i=1, #paths do
+ local pathobj = paths[i]
+ local kind, path = pathobj[1], pathobj[2]
+ local attributes = pathobj.attributes
+ if attributes and attributes.prefix == "xdg" then
+ --- this prepends the xdg root (usually ~/.config)
+ path = filejoin(xdg_home, path)
+ end
+
+ if kind == "dir" then
+ if stringsub(path, 1, 1) == "~" then
+ path = filejoin(home, stringsub(path, 2))
end
- else
- local tmp = stringfind(line, '<!--')
- local newline = line
- if tmp then
- -- for the analysis, we take everything that is before the
- -- comment sign
- newline = stringsub(line, 1, tmp-1)
- -- and we loop again with the comment
- incomments = true
- line = stringsub(line, tmp+4)
- else
- -- if there is no comment start, the block after that will
- -- end the analysis, we exit the while loop
- line = nil
+ --- We exclude paths with texmf in them, as they should be
+ --- found anyway; also duplicates are ignored by checking
+ --- if they are elements of dirs_done.
+ if not (stringfind(path, "texmf") or dirs_done[path]) then
+ acc[#acc+1] = path
+ dirs_done[path] = true
end
- for dir in stringgmatch(newline, '<dir>([^<]+)</dir>') do
- -- now we need to replace ~ by kpse.expand_path('~')
- if stringsub(dir, 1, 1) == '~' then
- dir = filejoin(kpseexpand_path('~'), stringsub(dir, 2))
- end
- -- we exclude paths with texmf in them, as they should be
- -- found anyway
- if not stringfind(dir, 'texmf') then
- results[#results+1] = dir
- end
+
+ elseif kind == "include" then
+ --- here the path can be four things: a directory or a file,
+ --- in absolute or relative path.
+ if stringsub(path, 1, 1) == "~" then
+ path = filejoin(home, stringsub(path, 2))
+ elseif --- if the path is relative, we make it absolute
+ not ( lfsisfile(path) or lfsisdir(path) )
+ then
+ path = filejoin(filedirname(start), path)
end
- for include in stringgmatch(newline, '<include[^<]*>([^<]+)</include>') do
- -- include here can be four things: a directory or a file,
- -- in absolute or relative path.
- if stringsub(include, 1, 1) == '~' then
- include = filejoin(kpseexpand_path('~'),stringsub(include, 2))
- -- First if the path is relative, we make it absolute:
- elseif not lfs.isfile(include) and not lfs.isdir(include) then
- include = filejoin(file.dirname(path), include)
- end
- if lfs.isfile(include)
- and kpsereadable_file(include)
- and not passed_paths_set[include]
- then
- -- maybe we should prevent loops here?
- -- we exclude path with texmf in them, as they should
- -- be found otherwise
- read_fonts_conf(include, results, passed_paths)
- elseif lfs.isdir(include) then
- for _,f in next, dirglob(filejoin(include, "*.conf")) do
- if not passed_paths_set[f] then
- read_fonts_conf(f, results, passed_paths)
- end
+ if lfsisfile(path)
+ and kpsereadable_file(path)
+ and not done[path]
+ then
+ --- we exclude path with texmf in them, as they should
+ --- be found otherwise
+ acc = read_fonts_conf_indeed(
+ path, home, xdg_home,
+ acc, done, dirs_done)
+ elseif lfsisdir(path) then --- arrow code ahead
+ local config_files = dirglob(filejoin(path, "*.conf"))
+ for _, filename in next, config_files do
+ if not done[filename] then
+ acc = read_fonts_conf_indeed(
+ filename, home, xdg_home,
+ acc, done, dirs_done)
end
end
- end
- end
+ end --- match “kind”
+ end --- iterate paths
end
- end
- fh:close()
- return results
-end
--- for testing purpose
-names.read_fonts_conf = read_fonts_conf
+ --inspect(acc)
+ --inspect(done)
+ return acc, done, dirs_done
+ end --- read_fonts_conf_indeed()
+
+ --[[doc--
+ read_fonts_conf() sets up an accumulator and two sets
+ for tracking what’s been done.
+
+ Also, the environment variables HOME and XDG_CONFIG_HOME --
+ which are constants anyways -- are expanded so don’t have to
+ repeat that over and over again as with the old parser.
+ Now they’re just passed on to every call of
+ read_fonts_conf_indeed().
+
+ read_fonts_conf() is also the only reference visible outside
+ the closure.
+ --doc]]--
+ --- list -> list
+ read_fonts_conf = function (path_list)
+ local home = kpseexpand_path"~" --- could be os.getenv"HOME"
+ local xdg_home = kpseexpand_path"$XDG_CONFIG_HOME"
+ if xdg_home == "" then xdg_home = filejoin(home, ".config") end
+ local acc = { } ---> list: paths collected
+ local done = { } ---> set: files inspected
+ local dirs_done = { } ---> set: dirs in list
+ for i=1, #path_list do --- we keep the state between files
+ acc, done, dirs_done = read_fonts_conf_indeed(
+ path_list[i], home, xdg_home,
+ acc, done, dirs_done)
+ end
+ return acc
+ end
+end --- read_fonts_conf closure
--- TODO stuff those paths into some writable table
local function get_os_dirs()
@@ -1258,14 +1380,12 @@ local function get_os_dirs()
elseif os.type == "windows" or os.type == "msdos" or os.name == "cygwin" then
local windir = os.getenv("WINDIR")
return { filejoin(windir, 'Fonts') }
- else
- local passed_paths = {}
- local os_dirs = {}
- for _,p in next, {"/usr/local/etc/fonts/fonts.conf", "/etc/fonts/fonts.conf"} do
- if lfs.isfile(p) then
- read_fonts_conf(p, os_dirs, passed_paths)
- end
- end
+ else
+ local fonts_conves = { --- plural, much?
+ "/usr/local/etc/fonts/fonts.conf",
+ "/etc/fonts/fonts.conf",
+ }
+ local os_dirs = read_fonts_conf(fonts_conves)
return os_dirs
end
return {}
@@ -1275,7 +1395,8 @@ local function scan_os_fonts(fontnames, newfontnames)
local n_scanned, n_new = 0, 0
--[[
This function scans the OS fonts through
- - fontcache for Unix (reads the fonts.conf file and scans the directories)
+ - fontcache for Unix (reads the fonts.conf file and scans the
+ directories)
- a static set of directories for Windows and MacOSX
]]
report("info", 2, "db", "Scanning OS fonts...")
@@ -1390,6 +1511,9 @@ names.resolve = resolve --- replace the resolver from luatex-fonts
names.resolvespec = resolve
names.find_closest = find_closest
+-- for testing purpose
+names.read_fonts_conf = read_fonts_conf
+
--- dummy required by luatex-fonts (cf. luatex-fonts-syn.lua)
fonts.names.getfilename = function (askedname,suffix) return "" end