summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <phg42.2a@gmail.com>2014-01-14 14:15:51 +0100
committerPhilipp Gesang <phg42.2a@gmail.com>2014-01-14 14:15:51 +0100
commit9417b2566c7e0ed40fbf216711c45d5e0be36fb7 (patch)
tree191b63e82dd97e455f456ef27666756a4908b433
parentb7a935f81e6d80ff31f92bb75cdf175029524fea (diff)
downloadluaotfload-9417b2566c7e0ed40fbf216711c45d5e0be36fb7.tar.gz
[features,parsers] move font request handler into luaotfload-parsers.lua
-rw-r--r--luaotfload-features.lua260
-rw-r--r--luaotfload-parsers.lua264
2 files changed, 268 insertions, 256 deletions
diff --git a/luaotfload-features.lua b/luaotfload-features.lua
index dc6b8a4..d786549 100644
--- a/luaotfload-features.lua
+++ b/luaotfload-features.lua
@@ -9,7 +9,12 @@ if not modules then modules = { } end modules ["features"] = {
local type, next = type, next
local tonumber = tonumber
local tostring = tostring
+
+local lpeg = require "lpeg"
local lpegmatch = lpeg.match
+local P = lpeg.P
+local R = lpeg.R
+local C = lpeg.C
---[[ begin included font-ltx.lua ]]
--- this appears to be based in part on luatex-fonts-def.lua
@@ -822,259 +827,6 @@ local set_default_features = function (speclist)
return speclist
end
------------------------------------------------------------------------
---- request syntax parser 2.2
------------------------------------------------------------------------
---- the luaotfload font request syntax (see manual)
---- has a canonical form:
----
---- \font<csname>=<prefix>:<identifier>:<features>
----
---- where
---- <csname> is the control sequence that activates the font
---- <prefix> is either “file” or “name”, determining the lookup
---- <identifer> is either a file name (no path) or a font
---- name, depending on the lookup
---- <features> is a list of switches or options, separated by
---- semicolons or commas; a switch is of the form “+” foo
---- or “-” foo, options are of the form lhs “=” rhs
----
---- however, to ensure backward compatibility we also have
---- support for Xetex-style requests.
----
---- for the Xetex emulation see:
---- · The XeTeX Reference Guide by Will Robertson, 2011
---- · The XeTeX Companion by Michel Goosens, 2010
---- · About XeTeX by Jonathan Kew, 2005
----
----
---- caueat emptor.
---- the request is parsed into one of **four** different
---- lookup categories: the regular ones, file and name,
---- as well as the Xetex compatibility ones, path and anon.
---- (maybe a better choice of identifier would be “ambig”.)
----
---- according to my reconstruction, the correct chaining
---- of the lookups for each category is as follows:
----
---- | File -> ( db/filename lookup )
----
---- | Name -> ( db/name lookup,
---- db/filename lookup )
----
---- | Path -> ( db/filename lookup,
---- fullpath lookup )
----
---- | Anon -> ( kpse.find_file(), // <- for tfm, ofm
---- db/name lookup,
---- db/filename lookup,
---- fullpath lookup )
----
---- caching of successful lookups is essential. we now
---- as of v2.2 have an experimental lookup cache that is
---- stored in a separate file. it pertains only to name:
---- lookups, and is described in more detail in
---- luaotfload-database.lua.
----
------------------------------------------------------------------------
-
---[[doc--
-
- One further incompatibility between Xetex and Luatex-Fonts consists
- in their option list syntax: apparently, Xetex requires key-value
- options to be prefixed by a "+" (ascii “plus”) character. We
- silently accept this as well, dropping the first byte if it is a
- plus or minus character.
-
- Reference: https://github.com/lualatex/luaotfload/issues/79#issuecomment-18104483
-
---doc]]--
-
-local handle_normal_option = function (key, val)
- val = stringlower(val)
- --- the former “toboolean()” handler
- if val == "true" then
- val = true
- elseif val == "false" then
- val = false
- end
- return key, val
-end
-
---[[doc--
-
- Xetex style indexing begins at zero which we just increment before
- passing it along to the font loader. Ymmv.
-
---doc]]--
-
-local handle_xetex_option = function (key, val)
- val = stringlower(val)
- local numeric = tonumber(val) --- decimal only; keeps colors intact
- if numeric then --- ugh
- if mathceil(numeric) == numeric then -- integer, possible index
- val = tostring(numeric + 1)
- end
- elseif val == "true" then
- val = true
- elseif val == "false" then
- val = false
- end
- return key, val
-end
-
---[[doc--
-
- Instead of silently ignoring invalid options we emit a warning to
- the log.
-
- Note that we have to return a pair to please rawset(). This creates
- an entry on the resulting features hash which will later be removed
- during set_default_features().
-
---doc]]--
-
-local handle_invalid_option = function (opt)
- report("log", 0, "load", "font option %q unknown.", opt)
- return "", false
-end
-
---[[doc--
-
- Dirty test if a file: request is actually a path: lookup; don’t
- ask! Note this fails on Windows-style absolute paths. These will
- *really* have to use the correct request.
-
---doc]]--
-
-local check_garbage = function (_,i, garbage)
- if stringfind(garbage, "/") then
- report("log", 0, "load", --- ffs use path!
- "warning: path in file: lookups is deprecated; ")
- report("log", 0, "load", "use bracket syntax instead!")
- report("log", 0, "load",
- "position: %d; full match: %q",
- i, garbage)
- return true
- end
- return false
-end
-
-local lpegmatch = lpeg.match
-local P, S, R = lpeg.P, lpeg.S, lpeg.R
-local C, Cc, Cf, Cg, Cmt, Cs, Ct
- = lpeg.C, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cs, lpeg.Ct
-
---- terminals and low-level classes -----------------------------------
---- note we could use the predefined ones from lpeg.patterns
-local dot = P"."
-local colon = P":"
-local featuresep = S",;"
-local slash = P"/"
-local equals = P"="
-local lbrk, rbrk = P"[", P"]"
-
-local spacing = S" \t\v"
-local ws = spacing^0
-
-local digit = R"09"
-local alpha = R("az", "AZ")
-local anum = alpha + digit
-local decimal = digit^1 * (dot * digit^0)^-1
-
---- modifiers ---------------------------------------------------------
---[[doc--
- The slash notation: called “modifiers” (Kew) or “font options”
- (Robertson, Goosens)
- we only support the shorthands for italic / bold / bold italic
- shapes, as well as setting optical size, the rest is ignored.
---doc]]--
-local style_modifier = (P"BI" + P"IB" + P"bi" + P"ib" + S"biBI")
- / stringlower
-local size_modifier = S"Ss" * P"=" --- optical size
- * Cc"optsize" * C(decimal)
-local other_modifier = P"AAT" + P"aat" --- apple stuff; unsupported
- + P"ICU" + P"icu" --- not applicable
- + P"GR" + P"gr" --- sil stuff; unsupported
-local garbage_modifier = ((1 - colon - slash)^0 * Cc(false))
-local modifier = slash * (other_modifier --> ignore
- + Cs(style_modifier) --> collect
- + Ct(size_modifier) --> collect
- + garbage_modifier) --> warn
-local modifier_list = Cg(Ct(modifier^0), "modifiers")
-
---- lookups -----------------------------------------------------------
-local fontname = C((1-S":(/")^1) --- like luatex-fonts
-local unsupported = Cmt((1-S":(")^1, check_garbage)
-local prefixed = P"name:" * ws * Cg(fontname, "name")
---- initially we intended file: to emulate the behavior of
---- luatex-fonts, i.e. no paths allowed. after all, we do have XeTeX
---- emulation with the path lookup and it interferes with db lookups.
---- turns out fontspec and other widely used packages rely on file:
---- with paths already, so we’ll add a less strict rule here. anyways,
---- we’ll emit a warning.
- + P"file:" * ws * Cg(unsupported, "path")
- + P"file:" * ws * Cg(fontname, "file")
---- EXPERIMENTAL: kpse lookup
- + P"kpse:" * ws * Cg(fontname, "kpse")
---- EXPERIMENTAL: custom lookup
- + P"my:" * ws * Cg(fontname, "my")
-local unprefixed = Cg(fontname, "anon")
-local path_lookup = lbrk * Cg(C((1-rbrk)^1), "path") * rbrk
-
---- features ----------------------------------------------------------
-local field_char = anum + S"+-." --- sic!
-local field = field_char^1
---- assignments are “lhs=rhs”
---- or “+lhs=rhs” (Xetex-style)
---- switches are “+key” | “-key”
-local normal_option = C(field) * ws * equals * ws * C(field) * ws
-local xetex_option = P"+" * ws * normal_option
-local ignore_option = (1 - equals - featuresep)^1
- * equals
- * (1 - featuresep)^1
-local assignment = xetex_option / handle_xetex_option
- + normal_option / handle_normal_option
- + ignore_option / handle_invalid_option
-local switch = P"+" * ws * C(field) * Cc(true)
- + P"-" * ws * C(field) * Cc(false)
- + C(field) * Cc(true) --- default
-local feature_expr = ws * Cg(assignment + switch) * ws
-local option = feature_expr
-local feature_list = Cf(Ct""
- * option
- * (featuresep * option^-1)^0
- , rawset)
- * featuresep^-1
-
---- other -------------------------------------------------------------
---- This rule is present in the original parser. It sets the “sub”
---- field of the specification which allows addressing a specific
---- font inside a TTC container. Neither in Luatex-Fonts nor in
---- Luaotfload is this documented, so we might as well silently drop
---- it. However, as backward compatibility is one of our prime goals we
---- just insert it here and leave it undocumented until someone cares
---- to ask. (Note: afair subfonts are numbered, but this rule matches a
---- string; I won’t mess with it though until someone reports a
---- problem.)
---- local subvalue = P("(") * (C(P(1-S("()"))^1)/issub) * P(")") -- for Kim
---- Note to self: subfonts apparently start at index 0. Tested with
---- Cambria.ttc that includes “Cambria Math” at 0 and “Cambria” at 1.
---- Other values cause luatex to segfault.
-local subfont = P"(" * Cg((1 - S"()")^1, "sub") * P")"
---- top-level rules ---------------------------------------------------
---- \font\foo=<specification>:<features>
-local features = Cg(feature_list, "features")
-local specification = (prefixed + unprefixed)
- * subfont^-1
- * modifier_list^-1
-local font_request = Ct(path_lookup * (colon^-1 * features)^-1
- + specification * (colon * features)^-1)
-
--- lpeg.print(font_request)
---- new parser: 657 rules
---- old parser: 230 rules
-
local import_values = {
--- That’s what the 1.x parser did, not quite as graciously,
--- with an array of branch expressions.
@@ -1138,7 +890,7 @@ end
--- spec -> spec
local handle_request = function (specification)
- local request = lpegmatch(font_request,
+ local request = lpegmatch(luaotfload.parsers.font_request,
specification.specification)
if not request then
--- happens when called with an absolute path
diff --git a/luaotfload-parsers.lua b/luaotfload-parsers.lua
index 0789c42..a989722 100644
--- a/luaotfload-parsers.lua
+++ b/luaotfload-parsers.lua
@@ -1,12 +1,12 @@
#!/usr/bin/env texlua
------------------------------------------------------------------------
+-------------------------------------------------------------------------------
-- FILE: luaotfload-parsers.lua
-- DESCRIPTION: various lpeg-based parsers used in Luaotfload
-- REQUIREMENTS: Luaotfload > 2.4
-- AUTHOR: Philipp Gesang (Phg), <phg42.2a@gmail.com>
-- VERSION: same as Luaotfload
-- CREATED: 2014-01-14 10:15:20+0100
------------------------------------------------------------------------
+-------------------------------------------------------------------------------
--
if not modules then modules = { } end modules ['luaotfload-parsers'] = {
@@ -44,6 +44,7 @@ local report = logs.report
local string = string
local stringsub = string.sub
local stringfind = string.find
+local stringlower = string.lower
local lfs = lfs
local lfsisfile = lfs.isfile
@@ -316,3 +317,262 @@ local splitcomma = Ct((C(noncomma^1) + comma)^1)
parsers.splitcomma = splitcomma
+
+-------------------------------------------------------------------------------
+--- FONT REQUEST
+-------------------------------------------------------------------------------
+
+
+--[[doc------------------------------------------------------------------------
+
+ The luaotfload font request syntax (see manual)
+ has a canonical form:
+
+ \font<csname>=<prefix>:<identifier>:<features>
+
+ where
+ <csname> is the control sequence that activates the font
+ <prefix> is either “file” or “name”, determining the lookup
+ <identifer> is either a file name (no path) or a font
+ name, depending on the lookup
+ <features> is a list of switches or options, separated by
+ semicolons or commas; a switch is of the form “+” foo
+ or “-” foo, options are of the form lhs “=” rhs
+
+ however, to ensure backward compatibility we also have
+ support for Xetex-style requests.
+
+ for the Xetex emulation see:
+ · The XeTeX Reference Guide by Will Robertson, 2011
+ · The XeTeX Companion by Michel Goosens, 2010
+ · About XeTeX by Jonathan Kew, 2005
+
+
+ caueat emptor.
+
+ the request is parsed into one of **four** different lookup
+ categories: the regular ones, file and name, as well as the
+ Xetex compatibility ones, path and anon. (maybe a better choice
+ of identifier would be “ambig”.)
+
+ according to my reconstruction, the correct chaining of the
+ lookups for each category is as follows:
+
+ | File -> ( db/filename lookup )
+
+ | Name -> ( db/name lookup,
+ db/filename lookup )
+
+ | Path -> ( db/filename lookup,
+ fullpath lookup )
+
+ | Anon -> ( kpse.find_file(), // <- for tfm, ofm
+ db/name lookup,
+ db/filename lookup,
+ fullpath lookup )
+
+ caching of successful lookups is essential. we now as of v2.2
+ have a lookup cache that is stored in a separate file. it
+ pertains only to name: lookups, and is described in more detail
+ in luaotfload-database.lua.
+
+-------------------------------------------------------------------------------
+
+ One further incompatibility between Xetex and Luatex-Fonts consists
+ in their option list syntax: apparently, Xetex requires key-value
+ options to be prefixed by a "+" (ascii “plus”) character. We
+ silently accept this as well, dropping the first byte if it is a
+ plus or minus character.
+
+ Reference: https://github.com/lualatex/luaotfload/issues/79#issuecomment-18104483
+
+--doc]]------------------------------------------------------------------------
+
+
+local handle_normal_option = function (key, val)
+ val = stringlower(val)
+ --- the former “toboolean()” handler
+ if val == "true" then
+ val = true
+ elseif val == "false" then
+ val = false
+ end
+ return key, val
+end
+
+--[[doc--
+
+ Xetex style indexing begins at zero which we just increment before
+ passing it along to the font loader. Ymmv.
+
+--doc]]--
+
+local handle_xetex_option = function (key, val)
+ val = stringlower(val)
+ local numeric = tonumber(val) --- decimal only; keeps colors intact
+ if numeric then --- ugh
+ if mathceil(numeric) == numeric then -- integer, possible index
+ val = tostring(numeric + 1)
+ end
+ elseif val == "true" then
+ val = true
+ elseif val == "false" then
+ val = false
+ end
+ return key, val
+end
+
+--[[doc--
+
+ Instead of silently ignoring invalid options we emit a warning to
+ the log.
+
+ Note that we have to return a pair to please rawset(). This creates
+ an entry on the resulting features hash which will later be removed
+ during set_default_features().
+
+--doc]]--
+
+local handle_invalid_option = function (opt)
+ report("log", 0, "load", "font option %q unknown.", opt)
+ return "", false
+end
+
+--[[doc--
+
+ Dirty test if a file: request is actually a path: lookup; don’t
+ ask! Note this fails on Windows-style absolute paths. These will
+ *really* have to use the correct request.
+
+--doc]]--
+
+local check_garbage = function (_,i, garbage)
+ if stringfind(garbage, "/") then
+ report("log", 0, "load", --- ffs use path!
+ "warning: path in file: lookups is deprecated; ")
+ report("log", 0, "load", "use bracket syntax instead!")
+ report("log", 0, "load",
+ "position: %d; full match: %q",
+ i, garbage)
+ return true
+ end
+ return false
+end
+
+local lpegmatch = lpeg.match
+local P, S, R = lpeg.P, lpeg.S, lpeg.R
+local C, Cc, Cf, Cg, Cmt, Cs, Ct
+ = lpeg.C, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cs, lpeg.Ct
+
+--- terminals and low-level classes -----------------------------------
+--- note we could use the predefined ones from lpeg.patterns
+local dot = P"."
+local colon = P":"
+local featuresep = S",;"
+local slash = P"/"
+local equals = P"="
+local lbrk, rbrk = P"[", P"]"
+
+local spacing = S" \t\v"
+local ws = spacing^0
+
+local digit = R"09"
+local alpha = R("az", "AZ")
+local anum = alpha + digit
+local decimal = digit^1 * (dot * digit^0)^-1
+
+--- modifiers ---------------------------------------------------------
+--[[doc--
+ The slash notation: called “modifiers” (Kew) or “font options”
+ (Robertson, Goosens)
+ we only support the shorthands for italic / bold / bold italic
+ shapes, as well as setting optical size, the rest is ignored.
+--doc]]--
+local style_modifier = (P"BI" + P"IB" + P"bi" + P"ib" + S"biBI")
+ / stringlower
+local size_modifier = S"Ss" * P"=" --- optical size
+ * Cc"optsize" * C(decimal)
+local other_modifier = P"AAT" + P"aat" --- apple stuff; unsupported
+ + P"ICU" + P"icu" --- not applicable
+ + P"GR" + P"gr" --- sil stuff; unsupported
+local garbage_modifier = ((1 - colon - slash)^0 * Cc(false))
+local modifier = slash * (other_modifier --> ignore
+ + Cs(style_modifier) --> collect
+ + Ct(size_modifier) --> collect
+ + garbage_modifier) --> warn
+local modifier_list = Cg(Ct(modifier^0), "modifiers")
+
+--- lookups -----------------------------------------------------------
+local fontname = C((1-S":(/")^1) --- like luatex-fonts
+local unsupported = Cmt((1-S":(")^1, check_garbage)
+local prefixed = P"name:" * ws * Cg(fontname, "name")
+--- initially we intended file: to emulate the behavior of
+--- luatex-fonts, i.e. no paths allowed. after all, we do have XeTeX
+--- emulation with the path lookup and it interferes with db lookups.
+--- turns out fontspec and other widely used packages rely on file:
+--- with paths already, so we’ll add a less strict rule here. anyways,
+--- we’ll emit a warning.
+ + P"file:" * ws * Cg(unsupported, "path")
+ + P"file:" * ws * Cg(fontname, "file")
+--- EXPERIMENTAL: kpse lookup
+ + P"kpse:" * ws * Cg(fontname, "kpse")
+--- EXPERIMENTAL: custom lookup
+ + P"my:" * ws * Cg(fontname, "my")
+local unprefixed = Cg(fontname, "anon")
+local path_lookup = lbrk * Cg(C((1-rbrk)^1), "path") * rbrk
+
+--- features ----------------------------------------------------------
+local field_char = anum + S"+-." --- sic!
+local field = field_char^1
+--- assignments are “lhs=rhs”
+--- or “+lhs=rhs” (Xetex-style)
+--- switches are “+key” | “-key”
+local normal_option = C(field) * ws * equals * ws * C(field) * ws
+local xetex_option = P"+" * ws * normal_option
+local ignore_option = (1 - equals - featuresep)^1
+ * equals
+ * (1 - featuresep)^1
+local assignment = xetex_option / handle_xetex_option
+ + normal_option / handle_normal_option
+ + ignore_option / handle_invalid_option
+local switch = P"+" * ws * C(field) * Cc(true)
+ + P"-" * ws * C(field) * Cc(false)
+ + C(field) * Cc(true) --- default
+local feature_expr = ws * Cg(assignment + switch) * ws
+local option = feature_expr
+local feature_list = Cf(Ct""
+ * option
+ * (featuresep * option^-1)^0
+ , rawset)
+ * featuresep^-1
+
+--- other -------------------------------------------------------------
+--- This rule is present in the original parser. It sets the “sub”
+--- field of the specification which allows addressing a specific
+--- font inside a TTC container. Neither in Luatex-Fonts nor in
+--- Luaotfload is this documented, so we might as well silently drop
+--- it. However, as backward compatibility is one of our prime goals we
+--- just insert it here and leave it undocumented until someone cares
+--- to ask. (Note: afair subfonts are numbered, but this rule matches a
+--- string; I won’t mess with it though until someone reports a
+--- problem.)
+--- local subvalue = P("(") * (C(P(1-S("()"))^1)/issub) * P(")") -- for Kim
+--- Note to self: subfonts apparently start at index 0. Tested with
+--- Cambria.ttc that includes “Cambria Math” at 0 and “Cambria” at 1.
+--- Other values cause luatex to segfault.
+local subfont = P"(" * Cg((1 - S"()")^1, "sub") * P")"
+--- top-level rules ---------------------------------------------------
+--- \font\foo=<specification>:<features>
+local features = Cg(feature_list, "features")
+local specification = (prefixed + unprefixed)
+ * subfont^-1
+ * modifier_list^-1
+local font_request = Ct(path_lookup * (colon^-1 * features)^-1
+ + specification * (colon * features)^-1)
+
+-- lpeg.print(font_request)
+--- v2.5 parser: 1065 rules
+--- v1.2 parser: 230 rules
+
+luaotfload.parsers.font_request = font_request
+