summaryrefslogtreecommitdiff
path: root/src/luaotfload-parsers.lua
diff options
context:
space:
mode:
authorPhilipp Gesang <phg42.2a@gmail.com>2014-02-12 07:50:06 +0100
committerPhilipp Gesang <phg42.2a@gmail.com>2014-02-12 07:50:06 +0100
commit9138da7d4a53d65bc15f3a5dc73fd373db40bdf7 (patch)
tree702093c750d81aa2e8810f484627b51d6b485c27 /src/luaotfload-parsers.lua
parentffa5a347f68805e218c61c344c0b8a895c4bb8db (diff)
downloadluaotfload-9138da7d4a53d65bc15f3a5dc73fd373db40bdf7.tar.gz
[*] move source files to ./src
Diffstat (limited to 'src/luaotfload-parsers.lua')
-rw-r--r--src/luaotfload-parsers.lua578
1 files changed, 578 insertions, 0 deletions
diff --git a/src/luaotfload-parsers.lua b/src/luaotfload-parsers.lua
new file mode 100644
index 0000000..1048e1d
--- /dev/null
+++ b/src/luaotfload-parsers.lua
@@ -0,0 +1,578 @@
+#!/usr/bin/env texlua
+-------------------------------------------------------------------------------
+-- FILE: luaotfload-parsers.lua
+-- DESCRIPTION: various lpeg-based parsers used in Luaotfload
+-- REQUIREMENTS: Luaotfload > 2.4
+-- AUTHOR: Philipp Gesang (Phg), <phg42.2a@gmail.com>
+-- VERSION: same as Luaotfload
+-- CREATED: 2014-01-14 10:15:20+0100
+-------------------------------------------------------------------------------
+--
+
+if not modules then modules = { } end modules ['luaotfload-parsers'] = {
+ version = "2.5",
+ comment = "companion to luaotfload-main.lua",
+ author = "Philipp Gesang",
+ copyright = "Luaotfload Development Team",
+ license = "GNU GPL v2.0"
+}
+
+luaotfload = luaotfload or { }
+luaotfload.parsers = luaotfload.parsers or { }
+local parsers = luaotfload.parsers
+
+local lpeg = require "lpeg"
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+local lpegmatch = lpeg.match
+local C, Cc, Cf = lpeg.C, lpeg.Cc, lpeg.Cf
+local Cg, Cmt, Cs, Ct = lpeg.Cg, lpeg.Cmt, lpeg.Cs, lpeg.Ct
+
+local kpse = kpse
+local kpseexpand_path = kpse.expand_path
+local kpsereadable_file = kpse.readable_file
+
+local file = file
+local filejoin = file.join
+local filedirname = file.dirname
+
+local io = io
+local ioopen = io.open
+
+local log = luaotfload.log
+local report = log.report
+
+local string = string
+local stringsub = string.sub
+local stringfind = string.find
+local stringlower = string.lower
+
+local mathceil = math.ceil
+
+local lfs = lfs
+local lfsisfile = lfs.isfile
+local lfsisdir = lfs.isdir
+
+-------------------------------------------------------------------------------
+--- COMMON PATTERNS
+-------------------------------------------------------------------------------
+
+local dot = P"."
+local colon = P":"
+local semicolon = P";"
+local comma = P","
+local noncomma = 1 - comma
+local slash = P"/"
+local equals = P"="
+local lbrk, rbrk = P"[", P"]"
+
+local spacing = S" \t\v"
+local linebreak = S"\n\r"
+local whitespace = spacing + linebreak
+local ws = spacing^0
+local xmlws = whitespace^1
+
+local digit = R"09"
+local alpha = R("az", "AZ")
+local anum = alpha + digit
+local decimal = digit^1 * (dot * digit^0)^-1
+
+-------------------------------------------------------------------------------
+--- FONTCONFIG
+-------------------------------------------------------------------------------
+
+--[[doc--
+
+ For fonts installed on the operating system, there are several
+ options to make Luaotfload index them:
+
+ - If OSFONTDIR is set (which is the case under windows by default
+ but not on the other OSs), it scans it at the same time as the
+ texmf tree, in the function scan_texmf_fonts().
+
+ - Otherwise
+ - under Windows and Mac OSX, we take a look at some hardcoded
+ directories,
+ - under Unix, it reads /etc/fonts/fonts.conf and processes the
+ directories specified there.
+
+ This means that if you have fonts in fancy directories, you need to
+ set them in OSFONTDIR.
+
+ Beware: OSFONTDIR is a kpathsea variable, so fonts found in these
+ paths, though technically system fonts, are registered in the
+ category “texmf”, not “system”. This may have consequences for the
+ lookup order when a font file (or a font with the same name
+ information) is located in both the system and the texmf tree.
+
+--doc]]--
+
+local tag_name = C(alpha^1)
+local comment = P"<!--" * (1 - P"--")^0 * P"-->"
+
+---> header specifica
+local xml_declaration = P"<?xml" * (1 - P"?>")^0 * P"?>"
+local xml_doctype = P"<!DOCTYPE" * xmlws
+ * "fontconfig" * (1 - P">")^0 * P">"
+local header = xml_declaration^-1
+ * (xml_doctype + comment + xmlws)^0
+
+---> enforce root node
+local root_start = P"<" * xmlws^-1 * P"fontconfig" * xmlws^-1 * P">"
+local root_stop = P"</" * xmlws^-1 * P"fontconfig" * xmlws^-1 * P">"
+
+local dquote, squote = P[["]], P"'"
+local xml_namestartchar = S":_" + alpha --- ascii only, funk the rest
+local xml_namechar = S":._" + alpha + digit
+local xml_name = xmlws^-1
+ * C(xml_namestartchar * xml_namechar^0)
+local xml_attvalue = dquote * C((1 - S[[%&"]])^1) * dquote * xmlws^-1
+ + squote * C((1 - S[[%&']])^1) * squote * xmlws^-1
+local xml_attr = Cg(xml_name * P"=" * xml_attvalue)
+local xml_attr_list = Cf(Ct"" * xml_attr^1, rawset)
+
+--[[doc--
+ scan_node creates a parser for a given xml tag.
+--doc]]--
+--- string -> bool -> lpeg_t
+local scan_node = function (tag)
+ --- Node attributes go into a table with the index “attributes”
+ --- (relevant for “prefix="xdg"” and the likes).
+ local p_tag = P(tag)
+ local with_attributes = P"<" * p_tag
+ * Cg(xml_attr_list, "attributes")^-1
+ * xmlws^-1
+ * P">"
+ local plain = P"<" * p_tag * xmlws^-1 * P">"
+ local node_start = plain + with_attributes
+ local node_stop = P"</" * p_tag * xmlws^-1 * P">"
+ --- there is no nesting, the earth is flat ...
+ local node = node_start
+ * Cc(tag) * C(comment + (1 - node_stop)^1)
+ * node_stop
+ return Ct(node) -- returns {string, string [, attributes = { key = val }] }
+end
+
+--[[doc--
+ At the moment, the interesting tags are “dir” for
+ directory declarations, and “include” for including
+ further configuration files.
+
+ spec: http://freedesktop.org/software/fontconfig/fontconfig-user.html
+--doc]]--
+local include_node = scan_node"include"
+local dir_node = scan_node"dir"
+
+local element = dir_node
+ + include_node
+ + comment --> ignore
+ + P(1-root_stop) --> skip byte
+
+local root = root_start * Ct(element^0) * root_stop
+local p_cheapxml = header * root
+
+--lpeg.print(p_cheapxml) ---> 757 rules with v0.10
+
+--[[doc--
+ fonts_conf_scanner() handles configuration files.
+ It is called on an abolute path to a config file (e.g.
+ /home/luser/.config/fontconfig/fonts.conf) and returns a list
+ of the nodes it managed to extract from the file.
+--doc]]--
+--- string -> path list
+local fonts_conf_scanner = function (path)
+ local fh = ioopen(path, "r")
+ if not fh then
+ report("both", 3, "db", "Cannot open fontconfig file %s.", path)
+ return
+ end
+ local raw = fh:read"*all"
+ fh:close()
+
+ local confdata = lpegmatch(p_cheapxml, raw)
+ if not confdata then
+ report("both", 3, "db", "Cannot scan fontconfig file %s.", path)
+ return
+ end
+ return confdata
+end
+
+local p_conf = P".conf" * P(-1)
+local p_filter = (1 - p_conf)^1 * p_conf
+
+local conf_filter = function (path)
+ if lpegmatch (p_filter, path) then
+ return true
+ end
+ return false
+end
+
+--[[doc--
+ read_fonts_conf_indeed() is called with six arguments; the
+ latter three are tables that represent the state and are
+ always returned.
+ The first three are
+ · the path to the file
+ · the expanded $HOME
+ · the expanded $XDG_CONFIG_DIR
+--doc]]--
+--- string -> string -> string -> tab -> tab -> (tab * tab * tab)
+local read_fonts_conf_indeed
+read_fonts_conf_indeed = function (start, home, xdg_home,
+ acc, done, dirs_done,
+ find_files)
+
+ local paths = fonts_conf_scanner(start)
+ if not paths then --- nothing to do
+ return acc, done, dirs_done
+ end
+
+ for i=1, #paths do
+ local pathobj = paths[i]
+ local kind, path = pathobj[1], pathobj[2]
+ local attributes = pathobj.attributes
+ if attributes and attributes.prefix == "xdg" then
+ --- this prepends the xdg root (usually ~/.config)
+ path = filejoin(xdg_home, path)
+ end
+
+ if kind == "dir" then
+ if stringsub(path, 1, 1) == "~" then
+ path = filejoin(home, stringsub(path, 2))
+ end
+ --- We exclude paths with texmf in them, as they should be
+ --- found anyway; also duplicates are ignored by checking
+ --- if they are elements of dirs_done.
+ ---
+ --- FIXME does this mean we cannot access paths from
+ --- distributions (e.g. Context minimals) installed
+ --- separately?
+ if not (stringfind(path, "texmf") or dirs_done[path]) then
+ acc[#acc+1] = path
+ dirs_done[path] = true
+ end
+
+ elseif kind == "include" then
+ --- here the path can be four things: a directory or a file,
+ --- in absolute or relative path.
+ if stringsub(path, 1, 1) == "~" then
+ path = filejoin(home, stringsub(path, 2))
+ elseif --- if the path is relative, we make it absolute
+ not ( lfsisfile(path) or lfsisdir(path) )
+ then
+ path = filejoin(filedirname(start), path)
+ end
+ if lfsisfile(path)
+ and kpsereadable_file(path)
+ and not done[path]
+ then
+ --- we exclude path with texmf in them, as they should
+ --- be found otherwise
+ acc = read_fonts_conf_indeed(
+ path, home, xdg_home,
+ acc, done, dirs_done)
+ elseif lfsisdir(path) then --- arrow code ahead
+ local config_files = find_files (path, conf_filter)
+ for _, filename in next, config_files do
+ if not done[filename] then
+ acc = read_fonts_conf_indeed(
+ filename, home, xdg_home,
+ acc, done, dirs_done)
+ end
+ end
+ end --- match “kind”
+ end --- iterate paths
+ end
+
+ --inspect(acc)
+ --inspect(done)
+ return acc, done, dirs_done
+ end --- read_fonts_conf_indeed()
+
+--[[doc--
+ read_fonts_conf() sets up an accumulator and two sets
+ for tracking what’s been done.
+
+ Also, the environment variables HOME and XDG_CONFIG_HOME --
+ which are constants anyways -- are expanded so don’t have to
+ repeat that over and over again as with the old parser.
+ Now they’re just passed on to every call of
+ read_fonts_conf_indeed().
+
+ read_fonts_conf() is also the only reference visible outside
+ the closure.
+--doc]]--
+
+--- list -> (string -> function option -> string list) -> list
+
+local read_fonts_conf = function (path_list, find_files)
+ local home = kpseexpand_path"~" --- could be os.getenv"HOME"
+ local xdg_home = kpseexpand_path"$XDG_CONFIG_HOME"
+ if xdg_home == "" then xdg_home = filejoin(home, ".config") end
+ local acc = { } ---> list: paths collected
+ local done = { } ---> set: files inspected
+ local dirs_done = { } ---> set: dirs in list
+ for i=1, #path_list do --- we keep the state between files
+ acc, done, dirs_done = read_fonts_conf_indeed(
+ path_list[i], home, xdg_home,
+ acc, done, dirs_done,
+ find_files)
+ end
+ return acc
+end
+
+luaotfload.parsers.read_fonts_conf = read_fonts_conf
+
+
+
+-------------------------------------------------------------------------------
+--- MISC PARSERS
+-------------------------------------------------------------------------------
+
+
+local trailingslashes = slash^1 * P(-1)
+local stripslashes = C((1 - trailingslashes)^0)
+parsers.stripslashes = stripslashes
+
+local splitcomma = Ct((C(noncomma^1) + comma)^1)
+parsers.splitcomma = splitcomma
+
+
+
+-------------------------------------------------------------------------------
+--- FONT REQUEST
+-------------------------------------------------------------------------------
+
+
+--[[doc------------------------------------------------------------------------
+
+ The luaotfload font request syntax (see manual)
+ has a canonical form:
+
+ \font<csname>=<prefix>:<identifier>:<features>
+
+ where
+ <csname> is the control sequence that activates the font
+ <prefix> is either “file” or “name”, determining the lookup
+ <identifer> is either a file name (no path) or a font
+ name, depending on the lookup
+ <features> is a list of switches or options, separated by
+ semicolons or commas; a switch is of the form “+” foo
+ or “-” foo, options are of the form lhs “=” rhs
+
+ however, to ensure backward compatibility we also have
+ support for Xetex-style requests.
+
+ for the Xetex emulation see:
+ · The XeTeX Reference Guide by Will Robertson, 2011
+ · The XeTeX Companion by Michel Goosens, 2010
+ · About XeTeX by Jonathan Kew, 2005
+
+
+ caueat emptor.
+
+ the request is parsed into one of **four** different lookup
+ categories: the regular ones, file and name, as well as the
+ Xetex compatibility ones, path and anon. (maybe a better choice
+ of identifier would be “ambig”.)
+
+ according to my reconstruction, the correct chaining of the
+ lookups for each category is as follows:
+
+ | File -> ( db/filename lookup )
+
+ | Name -> ( db/name lookup,
+ db/filename lookup )
+
+ | Path -> ( db/filename lookup,
+ fullpath lookup )
+
+ | Anon -> ( kpse.find_file(), // <- for tfm, ofm
+ db/name lookup,
+ db/filename lookup,
+ fullpath lookup )
+
+ caching of successful lookups is essential. we now as of v2.2
+ have a lookup cache that is stored in a separate file. it
+ pertains only to name: lookups, and is described in more detail
+ in luaotfload-database.lua.
+
+-------------------------------------------------------------------------------
+
+ One further incompatibility between Xetex and Luatex-Fonts consists
+ in their option list syntax: apparently, Xetex requires key-value
+ options to be prefixed by a "+" (ascii “plus”) character. We
+ silently accept this as well, dropping the first byte if it is a
+ plus or minus character.
+
+ Reference: https://github.com/lualatex/luaotfload/issues/79#issuecomment-18104483
+
+--doc]]------------------------------------------------------------------------
+
+
+local handle_normal_option = function (key, val)
+ val = stringlower(val)
+ --- the former “toboolean()” handler
+ if val == "true" then
+ val = true
+ elseif val == "false" then
+ val = false
+ end
+ return key, val
+end
+
+--[[doc--
+
+ Xetex style indexing begins at zero which we just increment before
+ passing it along to the font loader. Ymmv.
+
+--doc]]--
+
+local handle_xetex_option = function (key, val)
+ val = stringlower(val)
+ local numeric = tonumber(val) --- decimal only; keeps colors intact
+ if numeric then --- ugh
+ if mathceil(numeric) == numeric then -- integer, possible index
+ val = tostring(numeric + 1)
+ end
+ elseif val == "true" then
+ val = true
+ elseif val == "false" then
+ val = false
+ end
+ return key, val
+end
+
+--[[doc--
+
+ Instead of silently ignoring invalid options we emit a warning to
+ the log.
+
+ Note that we have to return a pair to please rawset(). This creates
+ an entry on the resulting features hash which will later be removed
+ during set_default_features().
+
+--doc]]--
+
+local handle_invalid_option = function (opt)
+ report("log", 0, "load", "font option %q unknown.", opt)
+ return "", false
+end
+
+--[[doc--
+
+ Dirty test if a file: request is actually a path: lookup; don’t
+ ask! Note this fails on Windows-style absolute paths. These will
+ *really* have to use the correct request.
+
+--doc]]--
+
+local check_garbage = function (_,i, garbage)
+ if stringfind(garbage, "/") then
+ report("log", 0, "load", --- ffs use path!
+ "warning: path in file: lookups is deprecated; ")
+ report("log", 0, "load", "use bracket syntax instead!")
+ report("log", 0, "load",
+ "position: %d; full match: %q",
+ i, garbage)
+ return true
+ end
+ return false
+end
+
+local featuresep = comma + semicolon
+
+--- modifiers ---------------------------------------------------------
+--[[doc--
+ The slash notation: called “modifiers” (Kew) or “font options”
+ (Robertson, Goosens)
+ we only support the shorthands for italic / bold / bold italic
+ shapes, as well as setting optical size, the rest is ignored.
+--doc]]--
+local style_modifier = (P"BI" + P"IB" + P"bi" + P"ib" + S"biBI")
+ / stringlower
+local size_modifier = S"Ss" * P"=" --- optical size
+ * Cc"optsize" * C(decimal)
+local other_modifier = P"AAT" + P"aat" --- apple stuff; unsupported
+ + P"ICU" + P"icu" --- not applicable
+ + P"GR" + P"gr" --- sil stuff; unsupported
+local garbage_modifier = ((1 - colon - slash)^0 * Cc(false))
+local modifier = slash * (other_modifier --> ignore
+ + Cs(style_modifier) --> collect
+ + Ct(size_modifier) --> collect
+ + garbage_modifier) --> warn
+local modifier_list = Cg(Ct(modifier^0), "modifiers")
+
+--- lookups -----------------------------------------------------------
+local fontname = C((1-S":(/")^1) --- like luatex-fonts
+local unsupported = Cmt((1-S":(")^1, check_garbage)
+local prefixed = P"name:" * ws * Cg(fontname, "name")
+--- initially we intended file: to emulate the behavior of
+--- luatex-fonts, i.e. no paths allowed. after all, we do have XeTeX
+--- emulation with the path lookup and it interferes with db lookups.
+--- turns out fontspec and other widely used packages rely on file:
+--- with paths already, so we’ll add a less strict rule here. anyways,
+--- we’ll emit a warning.
+ + P"file:" * ws * Cg(unsupported, "path")
+ + P"file:" * ws * Cg(fontname, "file")
+--- EXPERIMENTAL: kpse lookup
+ + P"kpse:" * ws * Cg(fontname, "kpse")
+--- EXPERIMENTAL: custom lookup
+ + P"my:" * ws * Cg(fontname, "my")
+local unprefixed = Cg(fontname, "anon")
+local path_lookup = lbrk * Cg(C((1-rbrk)^1), "path") * rbrk
+
+--- features ----------------------------------------------------------
+local field_char = anum + S"+-." --- sic!
+local field = field_char^1
+--- assignments are “lhs=rhs”
+--- or “+lhs=rhs” (Xetex-style)
+--- switches are “+key” | “-key”
+local normal_option = C(field) * ws * equals * ws * C(field) * ws
+local xetex_option = P"+" * ws * normal_option
+local ignore_option = (1 - equals - featuresep)^1
+ * equals
+ * (1 - featuresep)^1
+local assignment = xetex_option / handle_xetex_option
+ + normal_option / handle_normal_option
+ + ignore_option / handle_invalid_option
+local switch = P"+" * ws * C(field) * Cc(true)
+ + P"-" * ws * C(field) * Cc(false)
+ + C(field) * Cc(true) --- default
+local feature_expr = ws * Cg(assignment + switch) * ws
+local option = feature_expr
+local feature_list = Cf(Ct""
+ * option
+ * (featuresep * option^-1)^0
+ , rawset)
+ * featuresep^-1
+
+--- other -------------------------------------------------------------
+--- This rule is present in the original parser. It sets the “sub”
+--- field of the specification which allows addressing a specific
+--- font inside a TTC container. Neither in Luatex-Fonts nor in
+--- Luaotfload is this documented, so we might as well silently drop
+--- it. However, as backward compatibility is one of our prime goals we
+--- just insert it here and leave it undocumented until someone cares
+--- to ask. (Note: afair subfonts are numbered, but this rule matches a
+--- string; I won’t mess with it though until someone reports a
+--- problem.)
+--- local subvalue = P("(") * (C(P(1-S("()"))^1)/issub) * P(")") -- for Kim
+--- Note to self: subfonts apparently start at index 0. Tested with
+--- Cambria.ttc that includes “Cambria Math” at 0 and “Cambria” at 1.
+--- Other values cause luatex to segfault.
+local subfont = P"(" * Cg((1 - S"()")^1, "sub") * P")"
+--- top-level rules ---------------------------------------------------
+--- \font\foo=<specification>:<features>
+local features = Cg(feature_list, "features")
+local specification = (prefixed + unprefixed)
+ * subfont^-1
+ * modifier_list^-1
+local font_request = Ct(path_lookup * (colon^-1 * features)^-1
+ + specification * (colon * features)^-1)
+
+-- lpeg.print(font_request)
+--- v2.5 parser: 1065 rules
+--- v1.2 parser: 230 rules
+
+luaotfload.parsers.font_request = font_request
+