diff options
Diffstat (limited to 'luaotfload-parsers.lua')
-rw-r--r-- | luaotfload-parsers.lua | 311 |
1 files changed, 294 insertions, 17 deletions
diff --git a/luaotfload-parsers.lua b/luaotfload-parsers.lua index 89e3bc9..e6db21f 100644 --- a/luaotfload-parsers.lua +++ b/luaotfload-parsers.lua @@ -1,12 +1,12 @@ #!/usr/bin/env texlua ------------------------------------------------------------------------ +------------------------------------------------------------------------------- -- FILE: luaotfload-parsers.lua -- DESCRIPTION: various lpeg-based parsers used in Luaotfload -- REQUIREMENTS: Luaotfload > 2.4 -- AUTHOR: Philipp Gesang (Phg), <phg42.2a@gmail.com> -- VERSION: same as Luaotfload -- CREATED: 2014-01-14 10:15:20+0100 ------------------------------------------------------------------------ +------------------------------------------------------------------------------- -- if not modules then modules = { } end modules ['luaotfload-parsers'] = { @@ -25,7 +25,7 @@ local lpeg = require "lpeg" local P, R, S = lpeg.P, lpeg.R, lpeg.S local lpegmatch = lpeg.match local C, Cc, Cf = lpeg.C, lpeg.Cc, lpeg.Cf -local Cg, Cs, Ct = lpeg.Cg, lpeg.Cs, lpeg.Ct +local Cg, Cmt, Cs, Ct = lpeg.Cg, lpeg.Cmt, lpeg.Cs, lpeg.Ct local kpse = kpse local kpseexpand_path = kpse.expand_path @@ -44,11 +44,39 @@ local report = logs.report local string = string local stringsub = string.sub local stringfind = string.find +local stringlower = string.lower local lfs = lfs local lfsisfile = lfs.isfile local lfsisdir = lfs.isdir +------------------------------------------------------------------------------- +--- COMMON PATTERNS +------------------------------------------------------------------------------- + +local dot = P"." +local colon = P":" +local comma = P"," +local noncomma = 1 - comma +local slash = P"/" +local equals = P"=" +local lbrk, rbrk = P"[", P"]" + +local spacing = S" \t\v" +local linebreak = S"\n\r" +local whitespace = spacing + linebreak +local ws = spacing^0 +local xmlws = whitespace^1 + +local digit = R"09" +local alpha = R("az", "AZ") +local anum = alpha + digit +local decimal = digit^1 * (dot * digit^0)^-1 + +------------------------------------------------------------------------------- +--- FONTCONFIG +------------------------------------------------------------------------------- + --[[doc-- For fonts installed on the operating system, there are several @@ -75,31 +103,27 @@ local lfsisdir = lfs.isdir --doc]]-- -local alpha = R("az", "AZ") -local digit = R"09" local tag_name = C(alpha^1) -local whitespace = S" \n\r\t\v" -local ws = whitespace^1 local comment = P"<!--" * (1 - P"--")^0 * P"-->" ---> header specifica local xml_declaration = P"<?xml" * (1 - P"?>")^0 * P"?>" -local xml_doctype = P"<!DOCTYPE" * ws +local xml_doctype = P"<!DOCTYPE" * xmlws * "fontconfig" * (1 - P">")^0 * P">" local header = xml_declaration^-1 - * (xml_doctype + comment + ws)^0 + * (xml_doctype + comment + xmlws)^0 ---> enforce root node -local root_start = P"<" * ws^-1 * P"fontconfig" * ws^-1 * P">" -local root_stop = P"</" * ws^-1 * P"fontconfig" * ws^-1 * P">" +local root_start = P"<" * xmlws^-1 * P"fontconfig" * xmlws^-1 * P">" +local root_stop = P"</" * xmlws^-1 * P"fontconfig" * xmlws^-1 * P">" local dquote, squote = P[["]], P"'" local xml_namestartchar = S":_" + alpha --- ascii only, funk the rest local xml_namechar = S":._" + alpha + digit -local xml_name = ws^-1 +local xml_name = xmlws^-1 * C(xml_namestartchar * xml_namechar^0) -local xml_attvalue = dquote * C((1 - S[[%&"]])^1) * dquote * ws^-1 - + squote * C((1 - S[[%&']])^1) * squote * ws^-1 +local xml_attvalue = dquote * C((1 - S[[%&"]])^1) * dquote * xmlws^-1 + + squote * C((1 - S[[%&']])^1) * squote * xmlws^-1 local xml_attr = Cg(xml_name * P"=" * xml_attvalue) local xml_attr_list = Cf(Ct"" * xml_attr^1, rawset) @@ -113,11 +137,11 @@ local scan_node = function (tag) local p_tag = P(tag) local with_attributes = P"<" * p_tag * Cg(xml_attr_list, "attributes")^-1 - * ws^-1 + * xmlws^-1 * P">" - local plain = P"<" * p_tag * ws^-1 * P">" + local plain = P"<" * p_tag * xmlws^-1 * P">" local node_start = plain + with_attributes - local node_stop = P"</" * p_tag * ws^-1 * P">" + local node_stop = P"</" * p_tag * xmlws^-1 * P">" --- there is no nesting, the earth is flat ... local node = node_start * Cc(tag) * C(comment + (1 - node_stop)^1) @@ -296,3 +320,256 @@ end luaotfload.parsers.read_fonts_conf = read_fonts_conf + +------------------------------------------------------------------------------- +--- MISC PARSERS +------------------------------------------------------------------------------- + + +local trailingslashes = slash^1 * P(-1) +local stripslashes = C((1 - trailingslashes)^0) +parsers.stripslashes = stripslashes + +local splitcomma = Ct((C(noncomma^1) + comma)^1) +parsers.splitcomma = splitcomma + + + +------------------------------------------------------------------------------- +--- FONT REQUEST +------------------------------------------------------------------------------- + + +--[[doc------------------------------------------------------------------------ + + The luaotfload font request syntax (see manual) + has a canonical form: + + \font<csname>=<prefix>:<identifier>:<features> + + where + <csname> is the control sequence that activates the font + <prefix> is either “file” or “name”, determining the lookup + <identifer> is either a file name (no path) or a font + name, depending on the lookup + <features> is a list of switches or options, separated by + semicolons or commas; a switch is of the form “+” foo + or “-” foo, options are of the form lhs “=” rhs + + however, to ensure backward compatibility we also have + support for Xetex-style requests. + + for the Xetex emulation see: + · The XeTeX Reference Guide by Will Robertson, 2011 + · The XeTeX Companion by Michel Goosens, 2010 + · About XeTeX by Jonathan Kew, 2005 + + + caueat emptor. + + the request is parsed into one of **four** different lookup + categories: the regular ones, file and name, as well as the + Xetex compatibility ones, path and anon. (maybe a better choice + of identifier would be “ambig”.) + + according to my reconstruction, the correct chaining of the + lookups for each category is as follows: + + | File -> ( db/filename lookup ) + + | Name -> ( db/name lookup, + db/filename lookup ) + + | Path -> ( db/filename lookup, + fullpath lookup ) + + | Anon -> ( kpse.find_file(), // <- for tfm, ofm + db/name lookup, + db/filename lookup, + fullpath lookup ) + + caching of successful lookups is essential. we now as of v2.2 + have a lookup cache that is stored in a separate file. it + pertains only to name: lookups, and is described in more detail + in luaotfload-database.lua. + +------------------------------------------------------------------------------- + + One further incompatibility between Xetex and Luatex-Fonts consists + in their option list syntax: apparently, Xetex requires key-value + options to be prefixed by a "+" (ascii “plus”) character. We + silently accept this as well, dropping the first byte if it is a + plus or minus character. + + Reference: https://github.com/lualatex/luaotfload/issues/79#issuecomment-18104483 + +--doc]]------------------------------------------------------------------------ + + +local handle_normal_option = function (key, val) + val = stringlower(val) + --- the former “toboolean()” handler + if val == "true" then + val = true + elseif val == "false" then + val = false + end + return key, val +end + +--[[doc-- + + Xetex style indexing begins at zero which we just increment before + passing it along to the font loader. Ymmv. + +--doc]]-- + +local handle_xetex_option = function (key, val) + val = stringlower(val) + local numeric = tonumber(val) --- decimal only; keeps colors intact + if numeric then --- ugh + if mathceil(numeric) == numeric then -- integer, possible index + val = tostring(numeric + 1) + end + elseif val == "true" then + val = true + elseif val == "false" then + val = false + end + return key, val +end + +--[[doc-- + + Instead of silently ignoring invalid options we emit a warning to + the log. + + Note that we have to return a pair to please rawset(). This creates + an entry on the resulting features hash which will later be removed + during set_default_features(). + +--doc]]-- + +local handle_invalid_option = function (opt) + report("log", 0, "load", "font option %q unknown.", opt) + return "", false +end + +--[[doc-- + + Dirty test if a file: request is actually a path: lookup; don’t + ask! Note this fails on Windows-style absolute paths. These will + *really* have to use the correct request. + +--doc]]-- + +local check_garbage = function (_,i, garbage) + if stringfind(garbage, "/") then + report("log", 0, "load", --- ffs use path! + "warning: path in file: lookups is deprecated; ") + report("log", 0, "load", "use bracket syntax instead!") + report("log", 0, "load", + "position: %d; full match: %q", + i, garbage) + return true + end + return false +end + +local featuresep = comma + +--- modifiers --------------------------------------------------------- +--[[doc-- + The slash notation: called “modifiers” (Kew) or “font options” + (Robertson, Goosens) + we only support the shorthands for italic / bold / bold italic + shapes, as well as setting optical size, the rest is ignored. +--doc]]-- +local style_modifier = (P"BI" + P"IB" + P"bi" + P"ib" + S"biBI") + / stringlower +local size_modifier = S"Ss" * P"=" --- optical size + * Cc"optsize" * C(decimal) +local other_modifier = P"AAT" + P"aat" --- apple stuff; unsupported + + P"ICU" + P"icu" --- not applicable + + P"GR" + P"gr" --- sil stuff; unsupported +local garbage_modifier = ((1 - colon - slash)^0 * Cc(false)) +local modifier = slash * (other_modifier --> ignore + + Cs(style_modifier) --> collect + + Ct(size_modifier) --> collect + + garbage_modifier) --> warn +local modifier_list = Cg(Ct(modifier^0), "modifiers") + +--- lookups ----------------------------------------------------------- +local fontname = C((1-S":(/")^1) --- like luatex-fonts +local unsupported = Cmt((1-S":(")^1, check_garbage) +local prefixed = P"name:" * ws * Cg(fontname, "name") +--- initially we intended file: to emulate the behavior of +--- luatex-fonts, i.e. no paths allowed. after all, we do have XeTeX +--- emulation with the path lookup and it interferes with db lookups. +--- turns out fontspec and other widely used packages rely on file: +--- with paths already, so we’ll add a less strict rule here. anyways, +--- we’ll emit a warning. + + P"file:" * ws * Cg(unsupported, "path") + + P"file:" * ws * Cg(fontname, "file") +--- EXPERIMENTAL: kpse lookup + + P"kpse:" * ws * Cg(fontname, "kpse") +--- EXPERIMENTAL: custom lookup + + P"my:" * ws * Cg(fontname, "my") +local unprefixed = Cg(fontname, "anon") +local path_lookup = lbrk * Cg(C((1-rbrk)^1), "path") * rbrk + +--- features ---------------------------------------------------------- +local field_char = anum + S"+-." --- sic! +local field = field_char^1 +--- assignments are “lhs=rhs” +--- or “+lhs=rhs” (Xetex-style) +--- switches are “+key” | “-key” +local normal_option = C(field) * ws * equals * ws * C(field) * ws +local xetex_option = P"+" * ws * normal_option +local ignore_option = (1 - equals - featuresep)^1 + * equals + * (1 - featuresep)^1 +local assignment = xetex_option / handle_xetex_option + + normal_option / handle_normal_option + + ignore_option / handle_invalid_option +local switch = P"+" * ws * C(field) * Cc(true) + + P"-" * ws * C(field) * Cc(false) + + C(field) * Cc(true) --- default +local feature_expr = ws * Cg(assignment + switch) * ws +local option = feature_expr +local feature_list = Cf(Ct"" + * option + * (featuresep * option^-1)^0 + , rawset) + * featuresep^-1 + +--- other ------------------------------------------------------------- +--- This rule is present in the original parser. It sets the “sub” +--- field of the specification which allows addressing a specific +--- font inside a TTC container. Neither in Luatex-Fonts nor in +--- Luaotfload is this documented, so we might as well silently drop +--- it. However, as backward compatibility is one of our prime goals we +--- just insert it here and leave it undocumented until someone cares +--- to ask. (Note: afair subfonts are numbered, but this rule matches a +--- string; I won’t mess with it though until someone reports a +--- problem.) +--- local subvalue = P("(") * (C(P(1-S("()"))^1)/issub) * P(")") -- for Kim +--- Note to self: subfonts apparently start at index 0. Tested with +--- Cambria.ttc that includes “Cambria Math” at 0 and “Cambria” at 1. +--- Other values cause luatex to segfault. +local subfont = P"(" * Cg((1 - S"()")^1, "sub") * P")" +--- top-level rules --------------------------------------------------- +--- \font\foo=<specification>:<features> +local features = Cg(feature_list, "features") +local specification = (prefixed + unprefixed) + * subfont^-1 + * modifier_list^-1 +local font_request = Ct(path_lookup * (colon^-1 * features)^-1 + + specification * (colon * features)^-1) + +-- lpeg.print(font_request) +--- v2.5 parser: 1065 rules +--- v1.2 parser: 230 rules + +luaotfload.parsers.font_request = font_request + |