From 9417b2566c7e0ed40fbf216711c45d5e0be36fb7 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Tue, 14 Jan 2014 14:15:51 +0100 Subject: [features,parsers] move font request handler into luaotfload-parsers.lua --- luaotfload-features.lua | 260 ++--------------------------------------------- luaotfload-parsers.lua | 264 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 268 insertions(+), 256 deletions(-) diff --git a/luaotfload-features.lua b/luaotfload-features.lua index dc6b8a4..d786549 100644 --- a/luaotfload-features.lua +++ b/luaotfload-features.lua @@ -9,7 +9,12 @@ if not modules then modules = { } end modules ["features"] = { local type, next = type, next local tonumber = tonumber local tostring = tostring + +local lpeg = require "lpeg" local lpegmatch = lpeg.match +local P = lpeg.P +local R = lpeg.R +local C = lpeg.C ---[[ begin included font-ltx.lua ]] --- this appears to be based in part on luatex-fonts-def.lua @@ -822,259 +827,6 @@ local set_default_features = function (speclist) return speclist end ------------------------------------------------------------------------ ---- request syntax parser 2.2 ------------------------------------------------------------------------ ---- the luaotfload font request syntax (see manual) ---- has a canonical form: ---- ---- \font=:: ---- ---- where ---- is the control sequence that activates the font ---- is either “file” or “name”, determining the lookup ---- is either a file name (no path) or a font ---- name, depending on the lookup ---- is a list of switches or options, separated by ---- semicolons or commas; a switch is of the form “+” foo ---- or “-” foo, options are of the form lhs “=” rhs ---- ---- however, to ensure backward compatibility we also have ---- support for Xetex-style requests. ---- ---- for the Xetex emulation see: ---- · The XeTeX Reference Guide by Will Robertson, 2011 ---- · The XeTeX Companion by Michel Goosens, 2010 ---- · About XeTeX by Jonathan Kew, 2005 ---- ---- ---- caueat emptor. ---- the request is parsed into one of **four** different ---- lookup categories: the regular ones, file and name, ---- as well as the Xetex compatibility ones, path and anon. ---- (maybe a better choice of identifier would be “ambig”.) ---- ---- according to my reconstruction, the correct chaining ---- of the lookups for each category is as follows: ---- ---- | File -> ( db/filename lookup ) ---- ---- | Name -> ( db/name lookup, ---- db/filename lookup ) ---- ---- | Path -> ( db/filename lookup, ---- fullpath lookup ) ---- ---- | Anon -> ( kpse.find_file(), // <- for tfm, ofm ---- db/name lookup, ---- db/filename lookup, ---- fullpath lookup ) ---- ---- caching of successful lookups is essential. we now ---- as of v2.2 have an experimental lookup cache that is ---- stored in a separate file. it pertains only to name: ---- lookups, and is described in more detail in ---- luaotfload-database.lua. ---- ------------------------------------------------------------------------ - ---[[doc-- - - One further incompatibility between Xetex and Luatex-Fonts consists - in their option list syntax: apparently, Xetex requires key-value - options to be prefixed by a "+" (ascii “plus”) character. We - silently accept this as well, dropping the first byte if it is a - plus or minus character. - - Reference: https://github.com/lualatex/luaotfload/issues/79#issuecomment-18104483 - ---doc]]-- - -local handle_normal_option = function (key, val) - val = stringlower(val) - --- the former “toboolean()” handler - if val == "true" then - val = true - elseif val == "false" then - val = false - end - return key, val -end - ---[[doc-- - - Xetex style indexing begins at zero which we just increment before - passing it along to the font loader. Ymmv. - ---doc]]-- - -local handle_xetex_option = function (key, val) - val = stringlower(val) - local numeric = tonumber(val) --- decimal only; keeps colors intact - if numeric then --- ugh - if mathceil(numeric) == numeric then -- integer, possible index - val = tostring(numeric + 1) - end - elseif val == "true" then - val = true - elseif val == "false" then - val = false - end - return key, val -end - ---[[doc-- - - Instead of silently ignoring invalid options we emit a warning to - the log. - - Note that we have to return a pair to please rawset(). This creates - an entry on the resulting features hash which will later be removed - during set_default_features(). - ---doc]]-- - -local handle_invalid_option = function (opt) - report("log", 0, "load", "font option %q unknown.", opt) - return "", false -end - ---[[doc-- - - Dirty test if a file: request is actually a path: lookup; don’t - ask! Note this fails on Windows-style absolute paths. These will - *really* have to use the correct request. - ---doc]]-- - -local check_garbage = function (_,i, garbage) - if stringfind(garbage, "/") then - report("log", 0, "load", --- ffs use path! - "warning: path in file: lookups is deprecated; ") - report("log", 0, "load", "use bracket syntax instead!") - report("log", 0, "load", - "position: %d; full match: %q", - i, garbage) - return true - end - return false -end - -local lpegmatch = lpeg.match -local P, S, R = lpeg.P, lpeg.S, lpeg.R -local C, Cc, Cf, Cg, Cmt, Cs, Ct - = lpeg.C, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cs, lpeg.Ct - ---- terminals and low-level classes ----------------------------------- ---- note we could use the predefined ones from lpeg.patterns -local dot = P"." -local colon = P":" -local featuresep = S",;" -local slash = P"/" -local equals = P"=" -local lbrk, rbrk = P"[", P"]" - -local spacing = S" \t\v" -local ws = spacing^0 - -local digit = R"09" -local alpha = R("az", "AZ") -local anum = alpha + digit -local decimal = digit^1 * (dot * digit^0)^-1 - ---- modifiers --------------------------------------------------------- ---[[doc-- - The slash notation: called “modifiers” (Kew) or “font options” - (Robertson, Goosens) - we only support the shorthands for italic / bold / bold italic - shapes, as well as setting optical size, the rest is ignored. ---doc]]-- -local style_modifier = (P"BI" + P"IB" + P"bi" + P"ib" + S"biBI") - / stringlower -local size_modifier = S"Ss" * P"=" --- optical size - * Cc"optsize" * C(decimal) -local other_modifier = P"AAT" + P"aat" --- apple stuff; unsupported - + P"ICU" + P"icu" --- not applicable - + P"GR" + P"gr" --- sil stuff; unsupported -local garbage_modifier = ((1 - colon - slash)^0 * Cc(false)) -local modifier = slash * (other_modifier --> ignore - + Cs(style_modifier) --> collect - + Ct(size_modifier) --> collect - + garbage_modifier) --> warn -local modifier_list = Cg(Ct(modifier^0), "modifiers") - ---- lookups ----------------------------------------------------------- -local fontname = C((1-S":(/")^1) --- like luatex-fonts -local unsupported = Cmt((1-S":(")^1, check_garbage) -local prefixed = P"name:" * ws * Cg(fontname, "name") ---- initially we intended file: to emulate the behavior of ---- luatex-fonts, i.e. no paths allowed. after all, we do have XeTeX ---- emulation with the path lookup and it interferes with db lookups. ---- turns out fontspec and other widely used packages rely on file: ---- with paths already, so we’ll add a less strict rule here. anyways, ---- we’ll emit a warning. - + P"file:" * ws * Cg(unsupported, "path") - + P"file:" * ws * Cg(fontname, "file") ---- EXPERIMENTAL: kpse lookup - + P"kpse:" * ws * Cg(fontname, "kpse") ---- EXPERIMENTAL: custom lookup - + P"my:" * ws * Cg(fontname, "my") -local unprefixed = Cg(fontname, "anon") -local path_lookup = lbrk * Cg(C((1-rbrk)^1), "path") * rbrk - ---- features ---------------------------------------------------------- -local field_char = anum + S"+-." --- sic! -local field = field_char^1 ---- assignments are “lhs=rhs” ---- or “+lhs=rhs” (Xetex-style) ---- switches are “+key” | “-key” -local normal_option = C(field) * ws * equals * ws * C(field) * ws -local xetex_option = P"+" * ws * normal_option -local ignore_option = (1 - equals - featuresep)^1 - * equals - * (1 - featuresep)^1 -local assignment = xetex_option / handle_xetex_option - + normal_option / handle_normal_option - + ignore_option / handle_invalid_option -local switch = P"+" * ws * C(field) * Cc(true) - + P"-" * ws * C(field) * Cc(false) - + C(field) * Cc(true) --- default -local feature_expr = ws * Cg(assignment + switch) * ws -local option = feature_expr -local feature_list = Cf(Ct"" - * option - * (featuresep * option^-1)^0 - , rawset) - * featuresep^-1 - ---- other ------------------------------------------------------------- ---- This rule is present in the original parser. It sets the “sub” ---- field of the specification which allows addressing a specific ---- font inside a TTC container. Neither in Luatex-Fonts nor in ---- Luaotfload is this documented, so we might as well silently drop ---- it. However, as backward compatibility is one of our prime goals we ---- just insert it here and leave it undocumented until someone cares ---- to ask. (Note: afair subfonts are numbered, but this rule matches a ---- string; I won’t mess with it though until someone reports a ---- problem.) ---- local subvalue = P("(") * (C(P(1-S("()"))^1)/issub) * P(")") -- for Kim ---- Note to self: subfonts apparently start at index 0. Tested with ---- Cambria.ttc that includes “Cambria Math” at 0 and “Cambria” at 1. ---- Other values cause luatex to segfault. -local subfont = P"(" * Cg((1 - S"()")^1, "sub") * P")" ---- top-level rules --------------------------------------------------- ---- \font\foo=: -local features = Cg(feature_list, "features") -local specification = (prefixed + unprefixed) - * subfont^-1 - * modifier_list^-1 -local font_request = Ct(path_lookup * (colon^-1 * features)^-1 - + specification * (colon * features)^-1) - --- lpeg.print(font_request) ---- new parser: 657 rules ---- old parser: 230 rules - local import_values = { --- That’s what the 1.x parser did, not quite as graciously, --- with an array of branch expressions. @@ -1138,7 +890,7 @@ end --- spec -> spec local handle_request = function (specification) - local request = lpegmatch(font_request, + local request = lpegmatch(luaotfload.parsers.font_request, specification.specification) if not request then --- happens when called with an absolute path diff --git a/luaotfload-parsers.lua b/luaotfload-parsers.lua index 0789c42..a989722 100644 --- a/luaotfload-parsers.lua +++ b/luaotfload-parsers.lua @@ -1,12 +1,12 @@ #!/usr/bin/env texlua ------------------------------------------------------------------------ +------------------------------------------------------------------------------- -- FILE: luaotfload-parsers.lua -- DESCRIPTION: various lpeg-based parsers used in Luaotfload -- REQUIREMENTS: Luaotfload > 2.4 -- AUTHOR: Philipp Gesang (Phg), -- VERSION: same as Luaotfload -- CREATED: 2014-01-14 10:15:20+0100 ------------------------------------------------------------------------ +------------------------------------------------------------------------------- -- if not modules then modules = { } end modules ['luaotfload-parsers'] = { @@ -44,6 +44,7 @@ local report = logs.report local string = string local stringsub = string.sub local stringfind = string.find +local stringlower = string.lower local lfs = lfs local lfsisfile = lfs.isfile @@ -316,3 +317,262 @@ local splitcomma = Ct((C(noncomma^1) + comma)^1) parsers.splitcomma = splitcomma + +------------------------------------------------------------------------------- +--- FONT REQUEST +------------------------------------------------------------------------------- + + +--[[doc------------------------------------------------------------------------ + + The luaotfload font request syntax (see manual) + has a canonical form: + + \font=:: + + where + is the control sequence that activates the font + is either “file” or “name”, determining the lookup + is either a file name (no path) or a font + name, depending on the lookup + is a list of switches or options, separated by + semicolons or commas; a switch is of the form “+” foo + or “-” foo, options are of the form lhs “=” rhs + + however, to ensure backward compatibility we also have + support for Xetex-style requests. + + for the Xetex emulation see: + · The XeTeX Reference Guide by Will Robertson, 2011 + · The XeTeX Companion by Michel Goosens, 2010 + · About XeTeX by Jonathan Kew, 2005 + + + caueat emptor. + + the request is parsed into one of **four** different lookup + categories: the regular ones, file and name, as well as the + Xetex compatibility ones, path and anon. (maybe a better choice + of identifier would be “ambig”.) + + according to my reconstruction, the correct chaining of the + lookups for each category is as follows: + + | File -> ( db/filename lookup ) + + | Name -> ( db/name lookup, + db/filename lookup ) + + | Path -> ( db/filename lookup, + fullpath lookup ) + + | Anon -> ( kpse.find_file(), // <- for tfm, ofm + db/name lookup, + db/filename lookup, + fullpath lookup ) + + caching of successful lookups is essential. we now as of v2.2 + have a lookup cache that is stored in a separate file. it + pertains only to name: lookups, and is described in more detail + in luaotfload-database.lua. + +------------------------------------------------------------------------------- + + One further incompatibility between Xetex and Luatex-Fonts consists + in their option list syntax: apparently, Xetex requires key-value + options to be prefixed by a "+" (ascii “plus”) character. We + silently accept this as well, dropping the first byte if it is a + plus or minus character. + + Reference: https://github.com/lualatex/luaotfload/issues/79#issuecomment-18104483 + +--doc]]------------------------------------------------------------------------ + + +local handle_normal_option = function (key, val) + val = stringlower(val) + --- the former “toboolean()” handler + if val == "true" then + val = true + elseif val == "false" then + val = false + end + return key, val +end + +--[[doc-- + + Xetex style indexing begins at zero which we just increment before + passing it along to the font loader. Ymmv. + +--doc]]-- + +local handle_xetex_option = function (key, val) + val = stringlower(val) + local numeric = tonumber(val) --- decimal only; keeps colors intact + if numeric then --- ugh + if mathceil(numeric) == numeric then -- integer, possible index + val = tostring(numeric + 1) + end + elseif val == "true" then + val = true + elseif val == "false" then + val = false + end + return key, val +end + +--[[doc-- + + Instead of silently ignoring invalid options we emit a warning to + the log. + + Note that we have to return a pair to please rawset(). This creates + an entry on the resulting features hash which will later be removed + during set_default_features(). + +--doc]]-- + +local handle_invalid_option = function (opt) + report("log", 0, "load", "font option %q unknown.", opt) + return "", false +end + +--[[doc-- + + Dirty test if a file: request is actually a path: lookup; don’t + ask! Note this fails on Windows-style absolute paths. These will + *really* have to use the correct request. + +--doc]]-- + +local check_garbage = function (_,i, garbage) + if stringfind(garbage, "/") then + report("log", 0, "load", --- ffs use path! + "warning: path in file: lookups is deprecated; ") + report("log", 0, "load", "use bracket syntax instead!") + report("log", 0, "load", + "position: %d; full match: %q", + i, garbage) + return true + end + return false +end + +local lpegmatch = lpeg.match +local P, S, R = lpeg.P, lpeg.S, lpeg.R +local C, Cc, Cf, Cg, Cmt, Cs, Ct + = lpeg.C, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cs, lpeg.Ct + +--- terminals and low-level classes ----------------------------------- +--- note we could use the predefined ones from lpeg.patterns +local dot = P"." +local colon = P":" +local featuresep = S",;" +local slash = P"/" +local equals = P"=" +local lbrk, rbrk = P"[", P"]" + +local spacing = S" \t\v" +local ws = spacing^0 + +local digit = R"09" +local alpha = R("az", "AZ") +local anum = alpha + digit +local decimal = digit^1 * (dot * digit^0)^-1 + +--- modifiers --------------------------------------------------------- +--[[doc-- + The slash notation: called “modifiers” (Kew) or “font options” + (Robertson, Goosens) + we only support the shorthands for italic / bold / bold italic + shapes, as well as setting optical size, the rest is ignored. +--doc]]-- +local style_modifier = (P"BI" + P"IB" + P"bi" + P"ib" + S"biBI") + / stringlower +local size_modifier = S"Ss" * P"=" --- optical size + * Cc"optsize" * C(decimal) +local other_modifier = P"AAT" + P"aat" --- apple stuff; unsupported + + P"ICU" + P"icu" --- not applicable + + P"GR" + P"gr" --- sil stuff; unsupported +local garbage_modifier = ((1 - colon - slash)^0 * Cc(false)) +local modifier = slash * (other_modifier --> ignore + + Cs(style_modifier) --> collect + + Ct(size_modifier) --> collect + + garbage_modifier) --> warn +local modifier_list = Cg(Ct(modifier^0), "modifiers") + +--- lookups ----------------------------------------------------------- +local fontname = C((1-S":(/")^1) --- like luatex-fonts +local unsupported = Cmt((1-S":(")^1, check_garbage) +local prefixed = P"name:" * ws * Cg(fontname, "name") +--- initially we intended file: to emulate the behavior of +--- luatex-fonts, i.e. no paths allowed. after all, we do have XeTeX +--- emulation with the path lookup and it interferes with db lookups. +--- turns out fontspec and other widely used packages rely on file: +--- with paths already, so we’ll add a less strict rule here. anyways, +--- we’ll emit a warning. + + P"file:" * ws * Cg(unsupported, "path") + + P"file:" * ws * Cg(fontname, "file") +--- EXPERIMENTAL: kpse lookup + + P"kpse:" * ws * Cg(fontname, "kpse") +--- EXPERIMENTAL: custom lookup + + P"my:" * ws * Cg(fontname, "my") +local unprefixed = Cg(fontname, "anon") +local path_lookup = lbrk * Cg(C((1-rbrk)^1), "path") * rbrk + +--- features ---------------------------------------------------------- +local field_char = anum + S"+-." --- sic! +local field = field_char^1 +--- assignments are “lhs=rhs” +--- or “+lhs=rhs” (Xetex-style) +--- switches are “+key” | “-key” +local normal_option = C(field) * ws * equals * ws * C(field) * ws +local xetex_option = P"+" * ws * normal_option +local ignore_option = (1 - equals - featuresep)^1 + * equals + * (1 - featuresep)^1 +local assignment = xetex_option / handle_xetex_option + + normal_option / handle_normal_option + + ignore_option / handle_invalid_option +local switch = P"+" * ws * C(field) * Cc(true) + + P"-" * ws * C(field) * Cc(false) + + C(field) * Cc(true) --- default +local feature_expr = ws * Cg(assignment + switch) * ws +local option = feature_expr +local feature_list = Cf(Ct"" + * option + * (featuresep * option^-1)^0 + , rawset) + * featuresep^-1 + +--- other ------------------------------------------------------------- +--- This rule is present in the original parser. It sets the “sub” +--- field of the specification which allows addressing a specific +--- font inside a TTC container. Neither in Luatex-Fonts nor in +--- Luaotfload is this documented, so we might as well silently drop +--- it. However, as backward compatibility is one of our prime goals we +--- just insert it here and leave it undocumented until someone cares +--- to ask. (Note: afair subfonts are numbered, but this rule matches a +--- string; I won’t mess with it though until someone reports a +--- problem.) +--- local subvalue = P("(") * (C(P(1-S("()"))^1)/issub) * P(")") -- for Kim +--- Note to self: subfonts apparently start at index 0. Tested with +--- Cambria.ttc that includes “Cambria Math” at 0 and “Cambria” at 1. +--- Other values cause luatex to segfault. +local subfont = P"(" * Cg((1 - S"()")^1, "sub") * P")" +--- top-level rules --------------------------------------------------- +--- \font\foo=: +local features = Cg(feature_list, "features") +local specification = (prefixed + unprefixed) + * subfont^-1 + * modifier_list^-1 +local font_request = Ct(path_lookup * (colon^-1 * features)^-1 + + specification * (colon * features)^-1) + +-- lpeg.print(font_request) +--- v2.5 parser: 1065 rules +--- v1.2 parser: 230 rules + +luaotfload.parsers.font_request = font_request + -- cgit v1.2.3