summaryrefslogtreecommitdiff
path: root/luaotfload-parsers.lua
diff options
context:
space:
mode:
authorPhilipp Gesang <phg42.2a@gmail.com>2014-02-12 07:50:06 +0100
committerPhilipp Gesang <phg42.2a@gmail.com>2014-02-12 07:50:06 +0100
commit9138da7d4a53d65bc15f3a5dc73fd373db40bdf7 (patch)
tree702093c750d81aa2e8810f484627b51d6b485c27 /luaotfload-parsers.lua
parentffa5a347f68805e218c61c344c0b8a895c4bb8db (diff)
downloadluaotfload-9138da7d4a53d65bc15f3a5dc73fd373db40bdf7.tar.gz
[*] move source files to ./src
Diffstat (limited to 'luaotfload-parsers.lua')
-rw-r--r--luaotfload-parsers.lua578
1 files changed, 0 insertions, 578 deletions
diff --git a/luaotfload-parsers.lua b/luaotfload-parsers.lua
deleted file mode 100644
index 1048e1d..0000000
--- a/luaotfload-parsers.lua
+++ /dev/null
@@ -1,578 +0,0 @@
-#!/usr/bin/env texlua
--------------------------------------------------------------------------------
--- FILE: luaotfload-parsers.lua
--- DESCRIPTION: various lpeg-based parsers used in Luaotfload
--- REQUIREMENTS: Luaotfload > 2.4
--- AUTHOR: Philipp Gesang (Phg), <phg42.2a@gmail.com>
--- VERSION: same as Luaotfload
--- CREATED: 2014-01-14 10:15:20+0100
--------------------------------------------------------------------------------
---
-
-if not modules then modules = { } end modules ['luaotfload-parsers'] = {
- version = "2.5",
- comment = "companion to luaotfload-main.lua",
- author = "Philipp Gesang",
- copyright = "Luaotfload Development Team",
- license = "GNU GPL v2.0"
-}
-
-luaotfload = luaotfload or { }
-luaotfload.parsers = luaotfload.parsers or { }
-local parsers = luaotfload.parsers
-
-local lpeg = require "lpeg"
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
-local lpegmatch = lpeg.match
-local C, Cc, Cf = lpeg.C, lpeg.Cc, lpeg.Cf
-local Cg, Cmt, Cs, Ct = lpeg.Cg, lpeg.Cmt, lpeg.Cs, lpeg.Ct
-
-local kpse = kpse
-local kpseexpand_path = kpse.expand_path
-local kpsereadable_file = kpse.readable_file
-
-local file = file
-local filejoin = file.join
-local filedirname = file.dirname
-
-local io = io
-local ioopen = io.open
-
-local log = luaotfload.log
-local report = log.report
-
-local string = string
-local stringsub = string.sub
-local stringfind = string.find
-local stringlower = string.lower
-
-local mathceil = math.ceil
-
-local lfs = lfs
-local lfsisfile = lfs.isfile
-local lfsisdir = lfs.isdir
-
--------------------------------------------------------------------------------
---- COMMON PATTERNS
--------------------------------------------------------------------------------
-
-local dot = P"."
-local colon = P":"
-local semicolon = P";"
-local comma = P","
-local noncomma = 1 - comma
-local slash = P"/"
-local equals = P"="
-local lbrk, rbrk = P"[", P"]"
-
-local spacing = S" \t\v"
-local linebreak = S"\n\r"
-local whitespace = spacing + linebreak
-local ws = spacing^0
-local xmlws = whitespace^1
-
-local digit = R"09"
-local alpha = R("az", "AZ")
-local anum = alpha + digit
-local decimal = digit^1 * (dot * digit^0)^-1
-
--------------------------------------------------------------------------------
---- FONTCONFIG
--------------------------------------------------------------------------------
-
---[[doc--
-
- For fonts installed on the operating system, there are several
- options to make Luaotfload index them:
-
- - If OSFONTDIR is set (which is the case under windows by default
- but not on the other OSs), it scans it at the same time as the
- texmf tree, in the function scan_texmf_fonts().
-
- - Otherwise
- - under Windows and Mac OSX, we take a look at some hardcoded
- directories,
- - under Unix, it reads /etc/fonts/fonts.conf and processes the
- directories specified there.
-
- This means that if you have fonts in fancy directories, you need to
- set them in OSFONTDIR.
-
- Beware: OSFONTDIR is a kpathsea variable, so fonts found in these
- paths, though technically system fonts, are registered in the
- category “texmf”, not “system”. This may have consequences for the
- lookup order when a font file (or a font with the same name
- information) is located in both the system and the texmf tree.
-
---doc]]--
-
-local tag_name = C(alpha^1)
-local comment = P"<!--" * (1 - P"--")^0 * P"-->"
-
----> header specifica
-local xml_declaration = P"<?xml" * (1 - P"?>")^0 * P"?>"
-local xml_doctype = P"<!DOCTYPE" * xmlws
- * "fontconfig" * (1 - P">")^0 * P">"
-local header = xml_declaration^-1
- * (xml_doctype + comment + xmlws)^0
-
----> enforce root node
-local root_start = P"<" * xmlws^-1 * P"fontconfig" * xmlws^-1 * P">"
-local root_stop = P"</" * xmlws^-1 * P"fontconfig" * xmlws^-1 * P">"
-
-local dquote, squote = P[["]], P"'"
-local xml_namestartchar = S":_" + alpha --- ascii only, funk the rest
-local xml_namechar = S":._" + alpha + digit
-local xml_name = xmlws^-1
- * C(xml_namestartchar * xml_namechar^0)
-local xml_attvalue = dquote * C((1 - S[[%&"]])^1) * dquote * xmlws^-1
- + squote * C((1 - S[[%&']])^1) * squote * xmlws^-1
-local xml_attr = Cg(xml_name * P"=" * xml_attvalue)
-local xml_attr_list = Cf(Ct"" * xml_attr^1, rawset)
-
---[[doc--
- scan_node creates a parser for a given xml tag.
---doc]]--
---- string -> bool -> lpeg_t
-local scan_node = function (tag)
- --- Node attributes go into a table with the index “attributes”
- --- (relevant for “prefix="xdg"” and the likes).
- local p_tag = P(tag)
- local with_attributes = P"<" * p_tag
- * Cg(xml_attr_list, "attributes")^-1
- * xmlws^-1
- * P">"
- local plain = P"<" * p_tag * xmlws^-1 * P">"
- local node_start = plain + with_attributes
- local node_stop = P"</" * p_tag * xmlws^-1 * P">"
- --- there is no nesting, the earth is flat ...
- local node = node_start
- * Cc(tag) * C(comment + (1 - node_stop)^1)
- * node_stop
- return Ct(node) -- returns {string, string [, attributes = { key = val }] }
-end
-
---[[doc--
- At the moment, the interesting tags are “dir” for
- directory declarations, and “include” for including
- further configuration files.
-
- spec: http://freedesktop.org/software/fontconfig/fontconfig-user.html
---doc]]--
-local include_node = scan_node"include"
-local dir_node = scan_node"dir"
-
-local element = dir_node
- + include_node
- + comment --> ignore
- + P(1-root_stop) --> skip byte
-
-local root = root_start * Ct(element^0) * root_stop
-local p_cheapxml = header * root
-
---lpeg.print(p_cheapxml) ---> 757 rules with v0.10
-
---[[doc--
- fonts_conf_scanner() handles configuration files.
- It is called on an abolute path to a config file (e.g.
- /home/luser/.config/fontconfig/fonts.conf) and returns a list
- of the nodes it managed to extract from the file.
---doc]]--
---- string -> path list
-local fonts_conf_scanner = function (path)
- local fh = ioopen(path, "r")
- if not fh then
- report("both", 3, "db", "Cannot open fontconfig file %s.", path)
- return
- end
- local raw = fh:read"*all"
- fh:close()
-
- local confdata = lpegmatch(p_cheapxml, raw)
- if not confdata then
- report("both", 3, "db", "Cannot scan fontconfig file %s.", path)
- return
- end
- return confdata
-end
-
-local p_conf = P".conf" * P(-1)
-local p_filter = (1 - p_conf)^1 * p_conf
-
-local conf_filter = function (path)
- if lpegmatch (p_filter, path) then
- return true
- end
- return false
-end
-
---[[doc--
- read_fonts_conf_indeed() is called with six arguments; the
- latter three are tables that represent the state and are
- always returned.
- The first three are
- · the path to the file
- · the expanded $HOME
- · the expanded $XDG_CONFIG_DIR
---doc]]--
---- string -> string -> string -> tab -> tab -> (tab * tab * tab)
-local read_fonts_conf_indeed
-read_fonts_conf_indeed = function (start, home, xdg_home,
- acc, done, dirs_done,
- find_files)
-
- local paths = fonts_conf_scanner(start)
- if not paths then --- nothing to do
- return acc, done, dirs_done
- end
-
- for i=1, #paths do
- local pathobj = paths[i]
- local kind, path = pathobj[1], pathobj[2]
- local attributes = pathobj.attributes
- if attributes and attributes.prefix == "xdg" then
- --- this prepends the xdg root (usually ~/.config)
- path = filejoin(xdg_home, path)
- end
-
- if kind == "dir" then
- if stringsub(path, 1, 1) == "~" then
- path = filejoin(home, stringsub(path, 2))
- end
- --- We exclude paths with texmf in them, as they should be
- --- found anyway; also duplicates are ignored by checking
- --- if they are elements of dirs_done.
- ---
- --- FIXME does this mean we cannot access paths from
- --- distributions (e.g. Context minimals) installed
- --- separately?
- if not (stringfind(path, "texmf") or dirs_done[path]) then
- acc[#acc+1] = path
- dirs_done[path] = true
- end
-
- elseif kind == "include" then
- --- here the path can be four things: a directory or a file,
- --- in absolute or relative path.
- if stringsub(path, 1, 1) == "~" then
- path = filejoin(home, stringsub(path, 2))
- elseif --- if the path is relative, we make it absolute
- not ( lfsisfile(path) or lfsisdir(path) )
- then
- path = filejoin(filedirname(start), path)
- end
- if lfsisfile(path)
- and kpsereadable_file(path)
- and not done[path]
- then
- --- we exclude path with texmf in them, as they should
- --- be found otherwise
- acc = read_fonts_conf_indeed(
- path, home, xdg_home,
- acc, done, dirs_done)
- elseif lfsisdir(path) then --- arrow code ahead
- local config_files = find_files (path, conf_filter)
- for _, filename in next, config_files do
- if not done[filename] then
- acc = read_fonts_conf_indeed(
- filename, home, xdg_home,
- acc, done, dirs_done)
- end
- end
- end --- match “kind”
- end --- iterate paths
- end
-
- --inspect(acc)
- --inspect(done)
- return acc, done, dirs_done
- end --- read_fonts_conf_indeed()
-
---[[doc--
- read_fonts_conf() sets up an accumulator and two sets
- for tracking what’s been done.
-
- Also, the environment variables HOME and XDG_CONFIG_HOME --
- which are constants anyways -- are expanded so don’t have to
- repeat that over and over again as with the old parser.
- Now they’re just passed on to every call of
- read_fonts_conf_indeed().
-
- read_fonts_conf() is also the only reference visible outside
- the closure.
---doc]]--
-
---- list -> (string -> function option -> string list) -> list
-
-local read_fonts_conf = function (path_list, find_files)
- local home = kpseexpand_path"~" --- could be os.getenv"HOME"
- local xdg_home = kpseexpand_path"$XDG_CONFIG_HOME"
- if xdg_home == "" then xdg_home = filejoin(home, ".config") end
- local acc = { } ---> list: paths collected
- local done = { } ---> set: files inspected
- local dirs_done = { } ---> set: dirs in list
- for i=1, #path_list do --- we keep the state between files
- acc, done, dirs_done = read_fonts_conf_indeed(
- path_list[i], home, xdg_home,
- acc, done, dirs_done,
- find_files)
- end
- return acc
-end
-
-luaotfload.parsers.read_fonts_conf = read_fonts_conf
-
-
-
--------------------------------------------------------------------------------
---- MISC PARSERS
--------------------------------------------------------------------------------
-
-
-local trailingslashes = slash^1 * P(-1)
-local stripslashes = C((1 - trailingslashes)^0)
-parsers.stripslashes = stripslashes
-
-local splitcomma = Ct((C(noncomma^1) + comma)^1)
-parsers.splitcomma = splitcomma
-
-
-
--------------------------------------------------------------------------------
---- FONT REQUEST
--------------------------------------------------------------------------------
-
-
---[[doc------------------------------------------------------------------------
-
- The luaotfload font request syntax (see manual)
- has a canonical form:
-
- \font<csname>=<prefix>:<identifier>:<features>
-
- where
- <csname> is the control sequence that activates the font
- <prefix> is either “file” or “name”, determining the lookup
- <identifer> is either a file name (no path) or a font
- name, depending on the lookup
- <features> is a list of switches or options, separated by
- semicolons or commas; a switch is of the form “+” foo
- or “-” foo, options are of the form lhs “=” rhs
-
- however, to ensure backward compatibility we also have
- support for Xetex-style requests.
-
- for the Xetex emulation see:
- · The XeTeX Reference Guide by Will Robertson, 2011
- · The XeTeX Companion by Michel Goosens, 2010
- · About XeTeX by Jonathan Kew, 2005
-
-
- caueat emptor.
-
- the request is parsed into one of **four** different lookup
- categories: the regular ones, file and name, as well as the
- Xetex compatibility ones, path and anon. (maybe a better choice
- of identifier would be “ambig”.)
-
- according to my reconstruction, the correct chaining of the
- lookups for each category is as follows:
-
- | File -> ( db/filename lookup )
-
- | Name -> ( db/name lookup,
- db/filename lookup )
-
- | Path -> ( db/filename lookup,
- fullpath lookup )
-
- | Anon -> ( kpse.find_file(), // <- for tfm, ofm
- db/name lookup,
- db/filename lookup,
- fullpath lookup )
-
- caching of successful lookups is essential. we now as of v2.2
- have a lookup cache that is stored in a separate file. it
- pertains only to name: lookups, and is described in more detail
- in luaotfload-database.lua.
-
--------------------------------------------------------------------------------
-
- One further incompatibility between Xetex and Luatex-Fonts consists
- in their option list syntax: apparently, Xetex requires key-value
- options to be prefixed by a "+" (ascii “plus”) character. We
- silently accept this as well, dropping the first byte if it is a
- plus or minus character.
-
- Reference: https://github.com/lualatex/luaotfload/issues/79#issuecomment-18104483
-
---doc]]------------------------------------------------------------------------
-
-
-local handle_normal_option = function (key, val)
- val = stringlower(val)
- --- the former “toboolean()” handler
- if val == "true" then
- val = true
- elseif val == "false" then
- val = false
- end
- return key, val
-end
-
---[[doc--
-
- Xetex style indexing begins at zero which we just increment before
- passing it along to the font loader. Ymmv.
-
---doc]]--
-
-local handle_xetex_option = function (key, val)
- val = stringlower(val)
- local numeric = tonumber(val) --- decimal only; keeps colors intact
- if numeric then --- ugh
- if mathceil(numeric) == numeric then -- integer, possible index
- val = tostring(numeric + 1)
- end
- elseif val == "true" then
- val = true
- elseif val == "false" then
- val = false
- end
- return key, val
-end
-
---[[doc--
-
- Instead of silently ignoring invalid options we emit a warning to
- the log.
-
- Note that we have to return a pair to please rawset(). This creates
- an entry on the resulting features hash which will later be removed
- during set_default_features().
-
---doc]]--
-
-local handle_invalid_option = function (opt)
- report("log", 0, "load", "font option %q unknown.", opt)
- return "", false
-end
-
---[[doc--
-
- Dirty test if a file: request is actually a path: lookup; don’t
- ask! Note this fails on Windows-style absolute paths. These will
- *really* have to use the correct request.
-
---doc]]--
-
-local check_garbage = function (_,i, garbage)
- if stringfind(garbage, "/") then
- report("log", 0, "load", --- ffs use path!
- "warning: path in file: lookups is deprecated; ")
- report("log", 0, "load", "use bracket syntax instead!")
- report("log", 0, "load",
- "position: %d; full match: %q",
- i, garbage)
- return true
- end
- return false
-end
-
-local featuresep = comma + semicolon
-
---- modifiers ---------------------------------------------------------
---[[doc--
- The slash notation: called “modifiers” (Kew) or “font options”
- (Robertson, Goosens)
- we only support the shorthands for italic / bold / bold italic
- shapes, as well as setting optical size, the rest is ignored.
---doc]]--
-local style_modifier = (P"BI" + P"IB" + P"bi" + P"ib" + S"biBI")
- / stringlower
-local size_modifier = S"Ss" * P"=" --- optical size
- * Cc"optsize" * C(decimal)
-local other_modifier = P"AAT" + P"aat" --- apple stuff; unsupported
- + P"ICU" + P"icu" --- not applicable
- + P"GR" + P"gr" --- sil stuff; unsupported
-local garbage_modifier = ((1 - colon - slash)^0 * Cc(false))
-local modifier = slash * (other_modifier --> ignore
- + Cs(style_modifier) --> collect
- + Ct(size_modifier) --> collect
- + garbage_modifier) --> warn
-local modifier_list = Cg(Ct(modifier^0), "modifiers")
-
---- lookups -----------------------------------------------------------
-local fontname = C((1-S":(/")^1) --- like luatex-fonts
-local unsupported = Cmt((1-S":(")^1, check_garbage)
-local prefixed = P"name:" * ws * Cg(fontname, "name")
---- initially we intended file: to emulate the behavior of
---- luatex-fonts, i.e. no paths allowed. after all, we do have XeTeX
---- emulation with the path lookup and it interferes with db lookups.
---- turns out fontspec and other widely used packages rely on file:
---- with paths already, so we’ll add a less strict rule here. anyways,
---- we’ll emit a warning.
- + P"file:" * ws * Cg(unsupported, "path")
- + P"file:" * ws * Cg(fontname, "file")
---- EXPERIMENTAL: kpse lookup
- + P"kpse:" * ws * Cg(fontname, "kpse")
---- EXPERIMENTAL: custom lookup
- + P"my:" * ws * Cg(fontname, "my")
-local unprefixed = Cg(fontname, "anon")
-local path_lookup = lbrk * Cg(C((1-rbrk)^1), "path") * rbrk
-
---- features ----------------------------------------------------------
-local field_char = anum + S"+-." --- sic!
-local field = field_char^1
---- assignments are “lhs=rhs”
---- or “+lhs=rhs” (Xetex-style)
---- switches are “+key” | “-key”
-local normal_option = C(field) * ws * equals * ws * C(field) * ws
-local xetex_option = P"+" * ws * normal_option
-local ignore_option = (1 - equals - featuresep)^1
- * equals
- * (1 - featuresep)^1
-local assignment = xetex_option / handle_xetex_option
- + normal_option / handle_normal_option
- + ignore_option / handle_invalid_option
-local switch = P"+" * ws * C(field) * Cc(true)
- + P"-" * ws * C(field) * Cc(false)
- + C(field) * Cc(true) --- default
-local feature_expr = ws * Cg(assignment + switch) * ws
-local option = feature_expr
-local feature_list = Cf(Ct""
- * option
- * (featuresep * option^-1)^0
- , rawset)
- * featuresep^-1
-
---- other -------------------------------------------------------------
---- This rule is present in the original parser. It sets the “sub”
---- field of the specification which allows addressing a specific
---- font inside a TTC container. Neither in Luatex-Fonts nor in
---- Luaotfload is this documented, so we might as well silently drop
---- it. However, as backward compatibility is one of our prime goals we
---- just insert it here and leave it undocumented until someone cares
---- to ask. (Note: afair subfonts are numbered, but this rule matches a
---- string; I won’t mess with it though until someone reports a
---- problem.)
---- local subvalue = P("(") * (C(P(1-S("()"))^1)/issub) * P(")") -- for Kim
---- Note to self: subfonts apparently start at index 0. Tested with
---- Cambria.ttc that includes “Cambria Math” at 0 and “Cambria” at 1.
---- Other values cause luatex to segfault.
-local subfont = P"(" * Cg((1 - S"()")^1, "sub") * P")"
---- top-level rules ---------------------------------------------------
---- \font\foo=<specification>:<features>
-local features = Cg(feature_list, "features")
-local specification = (prefixed + unprefixed)
- * subfont^-1
- * modifier_list^-1
-local font_request = Ct(path_lookup * (colon^-1 * features)^-1
- + specification * (colon * features)^-1)
-
--- lpeg.print(font_request)
---- v2.5 parser: 1065 rules
---- v1.2 parser: 230 rules
-
-luaotfload.parsers.font_request = font_request
-