summaryrefslogtreecommitdiff
path: root/tex/context/base/font-otr.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/font-otr.lua')
-rw-r--r--tex/context/base/font-otr.lua1816
1 files changed, 1816 insertions, 0 deletions
diff --git a/tex/context/base/font-otr.lua b/tex/context/base/font-otr.lua
new file mode 100644
index 000000000..a83766f85
--- /dev/null
+++ b/tex/context/base/font-otr.lua
@@ -0,0 +1,1816 @@
+if not modules then modules = { } end modules ['font-otr'] = {
+ version = 1.001,
+ comment = "companion to font-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- this code is not yet ready for generic i.e. i want to be free to change the
+-- keys and values
+
+-- we can optimize kern pairs (i.e. simple h only positioning) later if we want
+-- which is easier as then we know if we have clashes between features
+--
+-- When looking into a cid font relates issue in the ff library I wondered if
+-- it made sense to use Lua to filter the information from the otf and ttf
+-- files. Quite some ff code relates to special fonts and in practice we only
+-- use rather normal opentype fonts.
+--
+-- The code here is based on the documentation (and examples) at the microsoft
+-- website. The code will be extended and improved stepwise. We generate a table
+-- that is comparabel with the one luatex creates but also can create one for
+-- context directly.
+--
+-- todo: add checks for versions
+-- todo: check all unsigned / signed
+-- todo: save mode for context font loader (also deal with unicode dups)
+--
+-- widths and weights are kind of messy: for instance lmmonolt ias a pfmweight of
+-- 400 while it should be 300
+--
+-- we can have a bit more in the info data if needed as it will nto really slow
+-- down identifying
+--
+-- the main loader is not yet for production use (work in progress on the dsp file
+-- but as soon we we're done i will also adapt that table (as there is no need to
+-- be completely ff compatible)
+
+if not characters then
+ require("char-def")
+ require("char-ini")
+end
+
+local next, type, unpack = next, type, unpack
+local byte, lower, char = string.byte, string.lower, string.char
+local bittest = bit32.btest
+local concat, remove = table.concat, table.remove
+local floor, mod, abs, sqrt, round = math.floor, math.mod, math.abs, math.sqrt, math.round
+local P, C, R, S, C, Cs, Cc, Ct, Carg, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Carg, lpeg.Cmt
+local lpegmatch = lpeg.match
+
+local setmetatableindex = table.setmetatableindex
+local formatters = string.formatters
+local sortedkeys = table.sortedkeys
+local sortedhash = table.sortedhash
+local stripstring = string.strip
+local utf16_to_utf8_be = utf.utf16_to_utf8_be
+
+local report = logs.reporter("otf reader")
+
+fonts = fonts or { }
+local handlers = fonts.handlers or { }
+fonts.handlers = handlers
+local otf = handlers.otf or { }
+handlers.otf = otf
+local readers = otf.readers or { }
+otf.readers = readers
+
+local files = utilities.files
+
+local readbytes = files.readbytes
+local readstring = files.readstring
+local readbyte = files.readcardinal1 -- 8-bit unsigned integer
+local readushort = files.readcardinal2 -- 16-bit unsigned integer
+local readuint = files.readcardinal3 -- 24-bit unsigned integer
+local readulong = files.readcardinal4 -- 24-bit unsigned integer
+local readchar = files.readinteger1 -- 8-bit signed integer
+local readshort = files.readinteger2 -- 16-bit signed integer
+local readlong = files.readinteger4 -- 24-bit unsigned integer
+local readfixed = files.readfixed4
+local readfword = readshort -- 16-bit signed integer that describes a quantity in FUnits
+local readufword = readushort -- 16-bit unsigned integer that describes a quantity in FUnits
+local readoffset = readushort
+local read2dot14 = files.read2dot14 -- 16-bit signed fixed number with the low 14 bits of fraction (2.14) (F2DOT14)
+
+local readtag = function(f) return f:read(4) end
+local skipshort = function(f,n) f:read(n and 2*n or 2) end
+
+local reportedskipped = { }
+
+local function reportskippedtable(tag)
+ if not reportedskipped[tag] then
+ report("loading of table %a skipped (reported once only)",tag)
+ reportedskipped[tag] = true
+ end
+end
+-- date represented in number of seconds since 12:00 midnight, January 1, 1904. The value is represented as a
+-- signed 64-bit integer
+
+local function readlongdatetime(f)
+ local a, b, c, d, e, f, g, h = byte(f:read(8),1,8)
+ return 0x100000000 * d + 0x1000000 * e + 0x10000 * f + 0x100 * g + h
+end
+
+-- We have quite some data tables. We are somewhat ff compatible with names but as I used
+-- the information form the microsoft site there can be differences. Eventually I might end
+-- up with a different ordering and naming.
+
+local reservednames = { [0] =
+ "copyright",
+ "family",
+ "subfamily",
+ "uniqueid",
+ "fullname",
+ "version",
+ "postscriptname",
+ "trademark",
+ "manufacturer",
+ "designer",
+ "description", -- descriptor in ff
+ "venderurl",
+ "designerurl",
+ "license",
+ "licenseurl",
+ "reserved",
+ "typographicfamily", -- preffamilyname
+ "typographicsubfamily", -- prefmodifiers
+ "compatiblefullname", -- for mac
+ "sampletext",
+ "cidfindfontname",
+ "wwsfamily",
+ "wwssubfamily",
+ "lightbackgroundpalette",
+ "darkbackgroundpalette",
+}
+
+-- more at: https://www.microsoft.com/typography/otspec/name.htm
+
+-- setmetatableindex(reservednames,function(t,k)
+-- local v = "name_" .. k
+-- t[k] = v
+-- return v
+-- end)
+
+local platforms = { [0] =
+ "unicode",
+ "macintosh",
+ "iso",
+ "windows",
+ "custom",
+}
+
+local encodings = {
+ unicode = { [0] =
+ "unicode 1.0 semantics",
+ "unicode 1.1 semantics",
+ "iso/iec 10646",
+ "unicode 2.0 bmp", -- cmap subtable formats 0, 4, 6
+ "unicode 2.0 full", -- cmap subtable formats 0, 4, 6, 10, 12
+ "unicode variation sequences", -- cmap subtable format 14).
+ "unicode full repertoire", -- cmap subtable formats 0, 4, 6, 10, 12, 13
+ },
+ macintosh = { [0] =
+ "roman", "japanese", "chinese (traditional)", "korean", "arabic", "hebrew", "greek", "russian",
+ "rsymbol", "devanagari", "gurmukhi", "gujarati", "oriya", "bengali", "tamil", "telugu", "kannada",
+ "malayalam", "sinhalese", "burmese", "khmer", "thai", "laotian", "georgian", "armenian",
+ "chinese (simplified)", "tibetan", "mongolian", "geez", "slavic", "vietnamese", "sindhi",
+ "uninterpreted",
+ },
+ iso = { [0] =
+ "7-bit ascii",
+ "iso 10646",
+ "iso 8859-1",
+ },
+ windows = { [0] =
+ "symbol",
+ "unicode bmp", -- this is utf16
+ "shiftjis",
+ "prc",
+ "big5",
+ "wansung",
+ "johab",
+ "reserved 7",
+ "reserved 8",
+ "reserved 9",
+ "unicode ucs-4",
+ },
+ custom = {
+ --custom: 0-255 : otf windows nt compatibility mapping
+ }
+}
+
+local decoders = {
+ unicode = { },
+ macintosh = { },
+ iso = { },
+ windows = {
+ ["unicode bmp"] = utf16_to_utf8_be
+ },
+ custom = { },
+}
+
+-- This is bit over the top as we can just look for either windows, unicode or macintosh
+-- names (in that order). A font with no english name is probably a weird one anyway.
+
+local languages = {
+ unicode = {
+ [ 0] = "english",
+ },
+ macintosh = {
+ [ 0] = "english",
+ [ 1] = "french",
+ [ 2] = "german",
+ [ 3] = "italian",
+ [ 4] = "dutch",
+ [ 5] = "swedish",
+ [ 6] = "spanish",
+ [ 7] = "danish",
+ [ 8] = "portuguese",
+ [ 9] = "norwegian",
+ [ 10] = "hebrew",
+ [ 11] = "japanese",
+ [ 12] = "arabic",
+ [ 13] = "finnish",
+ [ 14] = "greek",
+ [ 15] = "icelandic",
+ [ 16] = "maltese",
+ [ 17] = "turkish",
+ [ 18] = "croatian",
+ [ 19] = "chinese (traditional)",
+ [ 20] = "urdu",
+ [ 21] = "hindi",
+ [ 22] = "thai",
+ [ 23] = "korean",
+ [ 24] = "lithuanian",
+ [ 25] = "polish",
+ [ 26] = "hungarian",
+ [ 27] = "estonian",
+ [ 28] = "latvian",
+ [ 29] = "sami",
+ [ 30] = "faroese",
+ [ 31] = "farsi/persian",
+ [ 32] = "russian",
+ [ 33] = "chinese (simplified)",
+ [ 34] = "flemish",
+ [ 35] = "irish gaelic",
+ [ 36] = "albanian",
+ [ 37] = "romanian",
+ [ 38] = "czech",
+ [ 39] = "slovak",
+ [ 40] = "slovenian",
+ [ 41] = "yiddish",
+ [ 42] = "serbian",
+ [ 43] = "macedonian",
+ [ 44] = "bulgarian",
+ [ 45] = "ukrainian",
+ [ 46] = "byelorussian",
+ [ 47] = "uzbek",
+ [ 48] = "kazakh",
+ [ 49] = "azerbaijani (cyrillic script)",
+ [ 50] = "azerbaijani (arabic script)",
+ [ 51] = "armenian",
+ [ 52] = "georgian",
+ [ 53] = "moldavian",
+ [ 54] = "kirghiz",
+ [ 55] = "tajiki",
+ [ 56] = "turkmen",
+ [ 57] = "mongolian (mongolian script)",
+ [ 58] = "mongolian (cyrillic script)",
+ [ 59] = "pashto",
+ [ 60] = "kurdish",
+ [ 61] = "kashmiri",
+ [ 62] = "sindhi",
+ [ 63] = "tibetan",
+ [ 64] = "nepali",
+ [ 65] = "sanskrit",
+ [ 66] = "marathi",
+ [ 67] = "bengali",
+ [ 68] = "assamese",
+ [ 69] = "gujarati",
+ [ 70] = "punjabi",
+ [ 71] = "oriya",
+ [ 72] = "malayalam",
+ [ 73] = "kannada",
+ [ 74] = "tamil",
+ [ 75] = "telugu",
+ [ 76] = "sinhalese",
+ [ 77] = "burmese",
+ [ 78] = "khmer",
+ [ 79] = "lao",
+ [ 80] = "vietnamese",
+ [ 81] = "indonesian",
+ [ 82] = "tagalong",
+ [ 83] = "malay (roman script)",
+ [ 84] = "malay (arabic script)",
+ [ 85] = "amharic",
+ [ 86] = "tigrinya",
+ [ 87] = "galla",
+ [ 88] = "somali",
+ [ 89] = "swahili",
+ [ 90] = "kinyarwanda/ruanda",
+ [ 91] = "rundi",
+ [ 92] = "nyanja/chewa",
+ [ 93] = "malagasy",
+ [ 94] = "esperanto",
+ [128] = "welsh",
+ [129] = "basque",
+ [130] = "catalan",
+ [131] = "latin",
+ [132] = "quenchua",
+ [133] = "guarani",
+ [134] = "aymara",
+ [135] = "tatar",
+ [136] = "uighur",
+ [137] = "dzongkha",
+ [138] = "javanese (roman script)",
+ [139] = "sundanese (roman script)",
+ [140] = "galician",
+ [141] = "afrikaans",
+ [142] = "breton",
+ [143] = "inuktitut",
+ [144] = "scottish gaelic",
+ [145] = "manx gaelic",
+ [146] = "irish gaelic (with dot above)",
+ [147] = "tongan",
+ [148] = "greek (polytonic)",
+ [149] = "greenlandic",
+ [150] = "azerbaijani (roman script)",
+ },
+ iso = {
+ },
+ windows = {
+ [0x0436] = "afrikaans - south africa",
+ [0x041c] = "albanian - albania",
+ [0x0484] = "alsatian - france",
+ [0x045e] = "amharic - ethiopia",
+ [0x1401] = "arabic - algeria",
+ [0x3c01] = "arabic - bahrain",
+ [0x0c01] = "arabic - egypt",
+ [0x0801] = "arabic - iraq",
+ [0x2c01] = "arabic - jordan",
+ [0x3401] = "arabic - kuwait",
+ [0x3001] = "arabic - lebanon",
+ [0x1001] = "arabic - libya",
+ [0x1801] = "arabic - morocco",
+ [0x2001] = "arabic - oman",
+ [0x4001] = "arabic - qatar",
+ [0x0401] = "arabic - saudi arabia",
+ [0x2801] = "arabic - syria",
+ [0x1c01] = "arabic - tunisia",
+ [0x3801] = "arabic - u.a.e.",
+ [0x2401] = "arabic - yemen",
+ [0x042b] = "armenian - armenia",
+ [0x044d] = "assamese - india",
+ [0x082c] = "azeri (cyrillic) - azerbaijan",
+ [0x042c] = "azeri (latin) - azerbaijan",
+ [0x046d] = "bashkir - russia",
+ [0x042d] = "basque - basque",
+ [0x0423] = "belarusian - belarus",
+ [0x0845] = "bengali - bangladesh",
+ [0x0445] = "bengali - india",
+ [0x201a] = "bosnian (cyrillic) - bosnia and herzegovina",
+ [0x141a] = "bosnian (latin) - bosnia and herzegovina",
+ [0x047e] = "breton - france",
+ [0x0402] = "bulgarian - bulgaria",
+ [0x0403] = "catalan - catalan",
+ [0x0c04] = "chinese - hong kong s.a.r.",
+ [0x1404] = "chinese - macao s.a.r.",
+ [0x0804] = "chinese - people's republic of china",
+ [0x1004] = "chinese - singapore",
+ [0x0404] = "chinese - taiwan",
+ [0x0483] = "corsican - france",
+ [0x041a] = "croatian - croatia",
+ [0x101a] = "croatian (latin) - bosnia and herzegovina",
+ [0x0405] = "czech - czech republic",
+ [0x0406] = "danish - denmark",
+ [0x048c] = "dari - afghanistan",
+ [0x0465] = "divehi - maldives",
+ [0x0813] = "dutch - belgium",
+ [0x0413] = "dutch - netherlands",
+ [0x0c09] = "english - australia",
+ [0x2809] = "english - belize",
+ [0x1009] = "english - canada",
+ [0x2409] = "english - caribbean",
+ [0x4009] = "english - india",
+ [0x1809] = "english - ireland",
+ [0x2009] = "english - jamaica",
+ [0x4409] = "english - malaysia",
+ [0x1409] = "english - new zealand",
+ [0x3409] = "english - republic of the philippines",
+ [0x4809] = "english - singapore",
+ [0x1c09] = "english - south africa",
+ [0x2c09] = "english - trinidad and tobago",
+ [0x0809] = "english - united kingdom",
+ [0x0409] = "english - united states",
+ [0x3009] = "english - zimbabwe",
+ [0x0425] = "estonian - estonia",
+ [0x0438] = "faroese - faroe islands",
+ [0x0464] = "filipino - philippines",
+ [0x040b] = "finnish - finland",
+ [0x080c] = "french - belgium",
+ [0x0c0c] = "french - canada",
+ [0x040c] = "french - france",
+ [0x140c] = "french - luxembourg",
+ [0x180c] = "french - principality of monoco",
+ [0x100c] = "french - switzerland",
+ [0x0462] = "frisian - netherlands",
+ [0x0456] = "galician - galician",
+ [0x0437] = "georgian -georgia",
+ [0x0c07] = "german - austria",
+ [0x0407] = "german - germany",
+ [0x1407] = "german - liechtenstein",
+ [0x1007] = "german - luxembourg",
+ [0x0807] = "german - switzerland",
+ [0x0408] = "greek - greece",
+ [0x046f] = "greenlandic - greenland",
+ [0x0447] = "gujarati - india",
+ [0x0468] = "hausa (latin) - nigeria",
+ [0x040d] = "hebrew - israel",
+ [0x0439] = "hindi - india",
+ [0x040e] = "hungarian - hungary",
+ [0x040f] = "icelandic - iceland",
+ [0x0470] = "igbo - nigeria",
+ [0x0421] = "indonesian - indonesia",
+ [0x045d] = "inuktitut - canada",
+ [0x085d] = "inuktitut (latin) - canada",
+ [0x083c] = "irish - ireland",
+ [0x0434] = "isixhosa - south africa",
+ [0x0435] = "isizulu - south africa",
+ [0x0410] = "italian - italy",
+ [0x0810] = "italian - switzerland",
+ [0x0411] = "japanese - japan",
+ [0x044b] = "kannada - india",
+ [0x043f] = "kazakh - kazakhstan",
+ [0x0453] = "khmer - cambodia",
+ [0x0486] = "k'iche - guatemala",
+ [0x0487] = "kinyarwanda - rwanda",
+ [0x0441] = "kiswahili - kenya",
+ [0x0457] = "konkani - india",
+ [0x0412] = "korean - korea",
+ [0x0440] = "kyrgyz - kyrgyzstan",
+ [0x0454] = "lao - lao p.d.r.",
+ [0x0426] = "latvian - latvia",
+ [0x0427] = "lithuanian - lithuania",
+ [0x082e] = "lower sorbian - germany",
+ [0x046e] = "luxembourgish - luxembourg",
+ [0x042f] = "macedonian (fyrom) - former yugoslav republic of macedonia",
+ [0x083e] = "malay - brunei darussalam",
+ [0x043e] = "malay - malaysia",
+ [0x044c] = "malayalam - india",
+ [0x043a] = "maltese - malta",
+ [0x0481] = "maori - new zealand",
+ [0x047a] = "mapudungun - chile",
+ [0x044e] = "marathi - india",
+ [0x047c] = "mohawk - mohawk",
+ [0x0450] = "mongolian (cyrillic) - mongolia",
+ [0x0850] = "mongolian (traditional) - people's republic of china",
+ [0x0461] = "nepali - nepal",
+ [0x0414] = "norwegian (bokmal) - norway",
+ [0x0814] = "norwegian (nynorsk) - norway",
+ [0x0482] = "occitan - france",
+ [0x0448] = "odia (formerly oriya) - india",
+ [0x0463] = "pashto - afghanistan",
+ [0x0415] = "polish - poland",
+ [0x0416] = "portuguese - brazil",
+ [0x0816] = "portuguese - portugal",
+ [0x0446] = "punjabi - india",
+ [0x046b] = "quechua - bolivia",
+ [0x086b] = "quechua - ecuador",
+ [0x0c6b] = "quechua - peru",
+ [0x0418] = "romanian - romania",
+ [0x0417] = "romansh - switzerland",
+ [0x0419] = "russian - russia",
+ [0x243b] = "sami (inari) - finland",
+ [0x103b] = "sami (lule) - norway",
+ [0x143b] = "sami (lule) - sweden",
+ [0x0c3b] = "sami (northern) - finland",
+ [0x043b] = "sami (northern) - norway",
+ [0x083b] = "sami (northern) - sweden",
+ [0x203b] = "sami (skolt) - finland",
+ [0x183b] = "sami (southern) - norway",
+ [0x1c3b] = "sami (southern) - sweden",
+ [0x044f] = "sanskrit - india",
+ [0x1c1a] = "serbian (cyrillic) - bosnia and herzegovina",
+ [0x0c1a] = "serbian (cyrillic) - serbia",
+ [0x181a] = "serbian (latin) - bosnia and herzegovina",
+ [0x081a] = "serbian (latin) - serbia",
+ [0x046c] = "sesotho sa leboa - south africa",
+ [0x0432] = "setswana - south africa",
+ [0x045b] = "sinhala - sri lanka",
+ [0x041b] = "slovak - slovakia",
+ [0x0424] = "slovenian - slovenia",
+ [0x2c0a] = "spanish - argentina",
+ [0x400a] = "spanish - bolivia",
+ [0x340a] = "spanish - chile",
+ [0x240a] = "spanish - colombia",
+ [0x140a] = "spanish - costa rica",
+ [0x1c0a] = "spanish - dominican republic",
+ [0x300a] = "spanish - ecuador",
+ [0x440a] = "spanish - el salvador",
+ [0x100a] = "spanish - guatemala",
+ [0x480a] = "spanish - honduras",
+ [0x080a] = "spanish - mexico",
+ [0x4c0a] = "spanish - nicaragua",
+ [0x180a] = "spanish - panama",
+ [0x3c0a] = "spanish - paraguay",
+ [0x280a] = "spanish - peru",
+ [0x500a] = "spanish - puerto rico",
+ [0x0c0a] = "spanish (modern sort) - spain",
+ [0x040a] = "spanish (traditional sort) - spain",
+ [0x540a] = "spanish - united states",
+ [0x380a] = "spanish - uruguay",
+ [0x200a] = "spanish - venezuela",
+ [0x081d] = "sweden - finland",
+ [0x041d] = "swedish - sweden",
+ [0x045a] = "syriac - syria",
+ [0x0428] = "tajik (cyrillic) - tajikistan",
+ [0x085f] = "tamazight (latin) - algeria",
+ [0x0449] = "tamil - india",
+ [0x0444] = "tatar - russia",
+ [0x044a] = "telugu - india",
+ [0x041e] = "thai - thailand",
+ [0x0451] = "tibetan - prc",
+ [0x041f] = "turkish - turkey",
+ [0x0442] = "turkmen - turkmenistan",
+ [0x0480] = "uighur - prc",
+ [0x0422] = "ukrainian - ukraine",
+ [0x042e] = "upper sorbian - germany",
+ [0x0420] = "urdu - islamic republic of pakistan",
+ [0x0843] = "uzbek (cyrillic) - uzbekistan",
+ [0x0443] = "uzbek (latin) - uzbekistan",
+ [0x042a] = "vietnamese - vietnam",
+ [0x0452] = "welsh - united kingdom",
+ [0x0488] = "wolof - senegal",
+ [0x0485] = "yakut - russia",
+ [0x0478] = "yi - prc",
+ [0x046a] = "yoruba - nigeria",
+ },
+ custom = {
+ },
+}
+
+local standardromanencoding = { [0] = -- hijacked from wikipedia
+ "notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl",
+ "numbersign", "dollar", "percent", "ampersand", "quotesingle", "parenleft",
+ "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash",
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
+ "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at",
+ "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
+ "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft",
+ "backslash", "bracketright", "asciicircum", "underscore", "grave", "a", "b",
+ "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
+ "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar",
+ "braceright", "asciitilde", "Adieresis", "Aring", "Ccedilla", "Eacute",
+ "Ntilde", "Odieresis", "Udieresis", "aacute", "agrave", "acircumflex",
+ "adieresis", "atilde", "aring", "ccedilla", "eacute", "egrave",
+ "ecircumflex", "edieresis", "iacute", "igrave", "icircumflex", "idieresis",
+ "ntilde", "oacute", "ograve", "ocircumflex", "odieresis", "otilde", "uacute",
+ "ugrave", "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling",
+ "section", "bullet", "paragraph", "germandbls", "registered", "copyright",
+ "trademark", "acute", "dieresis", "notequal", "AE", "Oslash", "infinity",
+ "plusminus", "lessequal", "greaterequal", "yen", "mu", "partialdiff",
+ "summation", "product", "pi", "integral", "ordfeminine", "ordmasculine",
+ "Omega", "ae", "oslash", "questiondown", "exclamdown", "logicalnot",
+ "radical", "florin", "approxequal", "Delta", "guillemotleft",
+ "guillemotright", "ellipsis", "nonbreakingspace", "Agrave", "Atilde",
+ "Otilde", "OE", "oe", "endash", "emdash", "quotedblleft", "quotedblright",
+ "quoteleft", "quoteright", "divide", "lozenge", "ydieresis", "Ydieresis",
+ "fraction", "currency", "guilsinglleft", "guilsinglright", "fi", "fl",
+ "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase",
+ "perthousand", "Acircumflex", "Ecircumflex", "Aacute", "Edieresis", "Egrave",
+ "Iacute", "Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex",
+ "apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave", "dotlessi",
+ "circumflex", "tilde", "macron", "breve", "dotaccent", "ring", "cedilla",
+ "hungarumlaut", "ogonek", "caron", "Lslash", "lslash", "Scaron", "scaron",
+ "Zcaron", "zcaron", "brokenbar", "Eth", "eth", "Yacute", "yacute", "Thorn",
+ "thorn", "minus", "multiply", "onesuperior", "twosuperior", "threesuperior",
+ "onehalf", "onequarter", "threequarters", "franc", "Gbreve", "gbreve",
+ "Idotaccent", "Scedilla", "scedilla", "Cacute", "cacute", "Ccaron", "ccaron",
+ "dcroat",
+}
+
+local weights = {
+ [100] = "thin",
+ [200] = "extralight",
+ [300] = "light",
+ [400] = "normal",
+ [500] = "medium",
+ [600] = "semibold",
+ [700] = "bold",
+ [800] = "extrabold",
+ [900] = "black",
+}
+
+local widths = {
+ [1] = "ultracondensed",
+ [2] = "extracondensed",
+ [3] = "condensed",
+ [4] = "semicondensed",
+ [5] = "normal",
+ [6] = "semiexpanded",
+ [7] = "expanded",
+ [8] = "extraexpanded",
+ [9] = "ultraexpanded",
+}
+
+setmetatableindex(weights, function(t,k)
+ local r = floor((k + 50) / 100) * 100
+ local v = (r > 900 and "black") or rawget(t,r) or "normal"
+-- print("weight:",k,r,v)
+ return v
+end)
+
+setmetatableindex(widths,function(t,k)
+-- print("width:",k)
+ return "normal"
+end)
+
+local panoseweights = {
+ [ 0] = "normal",
+ [ 1] = "normal",
+ [ 2] = "verylight",
+ [ 3] = "light",
+ [ 4] = "thin",
+ [ 5] = "book",
+ [ 6] = "medium",
+ [ 7] = "demi",
+ [ 8] = "bold",
+ [ 9] = "heavy",
+ [10] = "black",
+}
+
+local panosewidths = {
+ [ 0] = "normal",
+ [ 1] = "normal",
+ [ 2] = "normal",
+ [ 3] = "normal",
+ [ 4] = "normal",
+ [ 5] = "expanded",
+ [ 6] = "condensed",
+ [ 7] = "veryexpanded",
+ [ 8] = "verycondensed",
+ [ 9] = "monospaced",
+}
+
+-- We implement a reader per table.
+
+-- The name table is probably the first one to load. After all this one provides
+-- useful information about what we deal with. The complication is that we need
+-- to filter the best one available.
+
+function readers.name(f,fontdata)
+ local datatable = fontdata.tables.name
+ if datatable then
+ f:seek("set",datatable.offset)
+ local format = readushort(f)
+ local nofnames = readushort(f)
+ local offset = readushort(f)
+ -- we can also provide a raw list as extra, todo as option
+ local namelists = {
+ unicode = { },
+ windows = { },
+ macintosh = { },
+ -- iso = { },
+ -- windows = { },
+ }
+ for i=1,nofnames do
+ local platform = platforms[readushort(f)]
+ if platform then
+ local namelist = namelists[platform]
+ if namelist then
+ local encoding = readushort(f)
+ local language = readushort(f)
+ local encodings = encodings[platform]
+ local languages = languages[platform]
+ if encodings and languages then
+ local encoding = encodings[encoding]
+ local language = languages[language]
+ if encoding and language then
+ local name = reservednames[readushort(f)]
+ if name then
+ namelist[#namelist+1] = {
+ platform = platform,
+ encoding = encoding,
+ language = language,
+ name = name,
+ length = readushort(f),
+ offset = readushort(f),
+ }
+ else
+ skipshort(f,2)
+ end
+ else
+ skipshort(f,3)
+ end
+ else
+ skipshort(f,3)
+ end
+ else
+ skipshort(f,5)
+ end
+ else
+ skipshort(f,5)
+ end
+ end
+ -- if format == 1 then
+ -- local noftags = readushort(f)
+ -- for i=1,noftags do
+ -- local length = readushort(f)
+ -- local offset = readushort(f)
+ -- end
+ -- end
+ --
+ -- we need to choose one we like, for instance an unicode one
+ --
+ local start = datatable.offset + offset
+ local names = { }
+ local done = { }
+ --
+ -- there is quite some logic in ff ... hard to follow so we start simple
+ -- and extend when we run into it (todo: proper reverse hash) .. we're only
+ -- interested in english anyway
+ --
+ local function filter(platform,e,l)
+ local namelist = namelists[platform]
+ for i=1,#namelist do
+ local name = namelist[i]
+ local nametag = name.name
+ if not done[nametag] then
+ local encoding = name.encoding
+ local language = name.language
+ if (not e or encoding == e) and (not l or language == l) then
+ f:seek("set",start+name.offset)
+ local content = readstring(f,name.length)
+ local decoder = decoders[platform]
+ if decoder then
+ decoder = decoder[encoding]
+ end
+ if decoder then
+ content = decoder(content)
+ end
+ names[nametag] = {
+ content = content,
+ platform = platform,
+ encoding = encoding,
+ language = language,
+ }
+ done[nametag] = true
+ end
+ end
+ end
+ end
+ --
+ filter("windows","unicode bmp","english - united states")
+ -- filter("unicode") -- which one ?
+ filter("macintosh","roman","english")
+ filter("windows")
+ filter("macintosh")
+ filter("unicode")
+ --
+ fontdata.names = names
+ else
+ fontdata.names = { }
+ end
+end
+
+-- This table is an original windows (with its precursor os/2) table. In ff this one is
+-- part of the pfminfo table but here we keep it separate (for now). We will create a
+-- properties table afterwards.
+
+readers["os/2"] = function(f,fontdata)
+ local datatable = fontdata.tables["os/2"]
+ if datatable then
+ f:seek("set",datatable.offset)
+ local version = readushort(f)
+ local windowsmetrics = {
+ version = version,
+ averagewidth = readushort(f),
+ weightclass = readushort(f),
+ widthclass = readushort(f),
+ fstype = readushort(f),
+ subscriptxsize = readushort(f),
+ subscriptysize = readushort(f),
+ subscriptxoffset = readushort(f),
+ subscriptyoffset = readushort(f),
+ superscriptxsize = readushort(f),
+ superscriptysize = readushort(f),
+ superscriptxoffset = readushort(f),
+ superscriptyoffset = readushort(f),
+ strikeoutsize = readushort(f),
+ strikeoutpos = readushort(f),
+ familyclass = readushort(f),
+ panose = { readbytes(f,10) },
+ unicoderanges = { readulong(f), readulong(f), readulong(f), readulong(f) },
+ vendor = readstring(f,4),
+ fsselection = readushort(f),
+ firstcharindex = readushort(f),
+ lastcharindex = readushort(f),
+ typoascender = readushort(f),
+ typodescender = readushort(f),
+ typolinegap = readushort(f),
+ winascent = readushort(f),
+ windescent = readushort(f),
+ }
+ if version >= 1 then
+ windowsmetrics.codepageranges = { readulong(f), readulong(f) }
+ end
+ if version >= 3 then
+ windowsmetrics.xheight = readshort(f)
+ windowsmetrics.capheight = readshort(f)
+ windowsmetrics.defaultchar = readushort(f)
+ windowsmetrics.breakchar = readushort(f)
+ -- windowsmetrics.maxcontexts = readushort(f)
+ -- windowsmetrics.loweropticalpointsize = readushort(f)
+ -- windowsmetrics.upperopticalpointsize = readushort(f)
+ end
+ --
+ -- todo: unicoderanges
+ --
+ windowsmetrics.weight = windowsmetrics.weightclass and weights[windowsmetrics.weightclass]
+ windowsmetrics.width = windowsmetrics.widthclass and widths [windowsmetrics.widthclass]
+ --
+ windowsmetrics.panoseweight = panoseweights[windowsmetrics.panose[3]]
+ windowsmetrics.panosewidth = panosewidths [windowsmetrics.panose[4]]
+ --
+ fontdata.windowsmetrics = windowsmetrics
+ else
+ fontdata.windowsmetrics = { }
+ end
+end
+
+readers.head = function(f,fontdata)
+ local datatable = fontdata.tables.head
+ if datatable then
+ f:seek("set",datatable.offset)
+ local fontheader = {
+ version = readfixed(f),
+ revision = readfixed(f),
+ checksum = readulong(f),
+ magic = readulong(f),
+ flags = readushort(f),
+ units = readushort(f),
+ created = readlongdatetime(f),
+ modified = readlongdatetime(f),
+ xmin = readshort(f),
+ ymin = readshort(f),
+ xmax = readshort(f),
+ ymax = readshort(f),
+ macstyle = readushort(f),
+ smallpixels = readushort(f),
+ directionhint = readshort(f),
+ indextolocformat = readshort(f),
+ glyphformat = readshort(f),
+ }
+ fontdata.fontheader = fontheader
+ fontdata.nofglyphs = 0
+ else
+ fontdata.fontheader = { }
+ fontdata.nofglyphs = 0
+ end
+end
+
+-- This table is a rather simple one. No treatment of values is needed here. Most
+-- variables are not used but nofhmetrics is quite important.
+
+readers.hhea = function(f,fontdata,specification)
+ if specification.details then
+ local datatable = fontdata.tables.hhea
+ if datatable then
+ f:seek("set",datatable.offset)
+ fontdata.horizontalheader = {
+ version = readfixed(f),
+ ascender = readfword(f),
+ descender = readfword(f),
+ linegap = readfword(f),
+ maxadvancewidth = readufword(f),
+ minleftsidebearing = readfword(f),
+ minrightsidebearing = readfword(f),
+ maxextent = readfword(f),
+ caretsloperise = readshort(f),
+ caretsloperun = readshort(f),
+ caretoffset = readshort(f),
+ reserved_1 = readshort(f),
+ reserved_2 = readshort(f),
+ reserved_3 = readshort(f),
+ reserved_4 = readshort(f),
+ metricdataformat = readshort(f),
+ nofhmetrics = readushort(f),
+ }
+ else
+ fontdata.horizontalheader = {
+ nofhmetrics = 0,
+ }
+ end
+ end
+end
+
+-- We probably never need all these variables, but we do need the nofglyphs
+-- when loading other tables. Again we use the microsoft names but see no reason
+-- to have "max" in each name.
+
+-- fontdata.maximumprofile can be bad
+
+readers.maxp = function(f,fontdata,specification)
+ if specification.details then
+ local datatable = fontdata.tables.maxp
+ if datatable then
+ f:seek("set",datatable.offset)
+ local version = readfixed(f)
+ if version == 0.5 then
+ fontdata.maximumprofile = {
+ version = version,
+ nofglyphs = readushort(f),
+ }
+ return
+ elseif version == 1.0 then
+ fontdata.maximumprofile = {
+ version = version,
+ nofglyphs = readushort(f),
+ points = readushort(f),
+ contours = readushort(f),
+ compositepoints = readushort(f),
+ compositecontours = readushort(f),
+ zones = readushort(f),
+ twilightpoints = readushort(f),
+ storage = readushort(f),
+ functiondefs = readushort(f),
+ instructiondefs = readushort(f),
+ stackelements = readushort(f),
+ sizeofinstructions = readushort(f),
+ componentelements = readushort(f),
+ componentdepth = readushort(f),
+ }
+ return
+ end
+ end
+ fontdata.maximumprofile = {
+ version = version,
+ nofglyphs = 0,
+ }
+ end
+end
+
+-- Here we filter the (advance) widths (that can be different from the boundingbox
+-- width of course).
+
+readers.hmtx = function(f,fontdata,specification)
+ if specification.glyphs then
+ local datatable = fontdata.tables.hmtx
+ if datatable then
+ f:seek("set",datatable.offset)
+ local nofmetrics = fontdata.horizontalheader.nofhmetrics
+ local glyphs = fontdata.glyphs
+ local nofglyphs = fontdata.nofglyphs
+ local nofrepeated = nofglyphs - nofmetrics
+ local width = 0 -- advance
+ local leftsidebearing = 0
+ for i=0,nofmetrics-1 do
+ local glyph = glyphs[i]
+ width = readshort(f)
+ leftsidebearing = readshort(f)
+ if advance ~= 0 then
+ glyph.width = width
+ end
+ if leftsidebearing ~= 0 then
+ glyph.lsb = leftsidebearing
+ end
+ end
+ -- The next can happen in for instance a monospace font or in a cjk font
+ -- with fixed widths.
+ for i=nofmetrics,nofrepeated do
+ local glyph = glyphs[i]
+ if width ~= 0 then
+ glyph.width = width
+ end
+ if leftsidebearing ~= 0 then
+ glyph.lsb = leftsidebearing
+ end
+ end
+ end
+ end
+end
+
+-- The post table relates to postscript (printing) but has some relevant
+-- properties for other usage as well. We just use the names from the microsoft
+-- specification. The version 2.0 description is somewhat fuzzy but it is a
+-- hybrid with overloads.
+
+readers.post = function(f,fontdata,specification)
+ local datatable = fontdata.tables.post
+ if datatable then
+ f:seek("set",datatable.offset)
+ local version = readfixed(f)
+ fontdata.postscript = {
+ version = version,
+ italicangle = round(1000*readfixed(f))/1000,
+ underlineposition = readfword(f),
+ underlinethickness = readfword(f),
+ monospaced = readulong(f),
+ minmemtype42 = readulong(f),
+ maxmemtype42 = readulong(f),
+ minmemtype1 = readulong(f),
+ maxmemtype1 = readulong(f),
+ }
+ if not specification.glyphs then
+ -- enough done
+ elseif version == 1.0 then
+ -- mac encoding (258 glyphs)
+ for index=0,#standardromanencoding do
+ glyphs[index].name = standardromanencoding[index]
+ end
+ elseif version == 2.0 then
+ local glyphs = fontdata.glyphs
+ local nofglyphs = readushort(f)
+ local filesize = fontdata.filesize
+ local indices = { }
+ local names = { }
+ local maxnames = 0
+ for i=0,nofglyphs-1 do
+ local nameindex = readushort(f)
+ if nameindex >= 258 then
+ maxnames = maxnames + 1
+ nameindex = nameindex - 257
+ indices[nameindex] = i
+ else
+ glyphs[i].name = standardromanencoding[nameindex]
+ end
+ end
+ for i=1,maxnames do
+ local length = readbyte(f)
+ if length > 0 then
+ glyphs[indices[i]].name = readstring(f,length)
+ else
+ report("quit post name fetching at %a of %a",i,maxnames)
+ break
+ end
+ end
+ elseif version == 2.5 then
+ -- depricated, will be done when needed
+ elseif version == 3.0 then
+ -- no ps name information
+ end
+ else
+ fontdata.postscript = { }
+ end
+end
+
+readers.cff = function(f,fontdata,specification)
+ if specification.glyphs then
+ reportskippedtable("cff")
+ end
+end
+
+-- Not all cmaps make sense .. e.g. dfont is obsolete and probably more are not
+-- relevant. Let's see what we run into. There is some weird calculation going
+-- on here because we offset in a table being a blob of memory or file.
+
+local formatreaders = { }
+
+formatreaders[4] = function(f,fontdata,offset)
+ f:seek("set",offset+2) -- skip format
+ --
+ local length = readushort(f) -- in bytes of subtable
+ local language = readushort(f)
+ local nofsegments = readushort(f) / 2
+ --
+ skipshort(f,3) -- searchrange entryselector rangeshift
+ --
+ local endchars = { }
+ local startchars = { }
+ local deltas = { }
+ local offsets = { }
+ local indices = { }
+ local mapmap = fontdata.map.map
+ local glyphs = fontdata.glyphs
+ --
+ for i=1,nofsegments do
+ endchars[i] = readushort(f)
+ end
+ local reserved = readushort(f)
+ for i=1,nofsegments do
+ startchars[i] = readushort(f)
+ end
+ for i=1,nofsegments do
+ deltas[i] = readshort(f)
+ end
+ for i=1,nofsegments do
+ offsets[i] = readushort(f)
+ end
+ -- format length language nofsegments searchrange entryselector rangeshift 4-tables
+ local size = (length - 2 * 2 - 5 * 2 - 4 * nofsegments * 2) / 2
+ for i=1,size-1 do
+ indices[i] = readushort(f)
+ end
+ --
+ for segment=1,nofsegments do
+ local startchar = startchars[segment]
+ local endchar = endchars[segment]
+ local offset = offsets[segment]
+ local delta = deltas[segment]
+ if startchar == 0xFFFF and endchar == 0xFFFF then
+ break
+ elseif offset == 0 then
+ for char=startchar,endchar do
+ local unicode = char
+ local index = mod(char + delta,65536)
+ if index and index > 0 then
+ local glyph = glyphs[index]
+ if not glyph.unicode then
+ glyph.unicode = unicode
+ end
+ mapmap[index] = unicode
+ -- report("%C %04i %05i %s",unicode,index,glyphs[index].name)
+ end
+ end
+ else
+ local shift = (segment-nofsegments+offset/2) - startchar
+ for char=startchar,endchar do
+ local unicode = mod(char + delta,65536)
+ local slot = shift + char
+ local index = indices[slot]
+ if index and index > 0 then
+ local glyph = glyphs[index]
+ if not glyph.unicode then
+ glyph.unicode = unicode
+ end
+ mapmap[index] = unicode
+ -- report("%C %04i %05i %s",unicode,index,glyphs[index].name)
+ end
+ end
+ end
+ end
+
+end
+
+formatreaders[6] = function(f,fontdata,offset)
+ f:seek("set",offset+2+2+2) -- skip format length language
+ local mapmap = fontdata.map.map
+ local glyphs = fontdata.glyphs
+ local start = readushort(f)
+ local count = readushort(f)
+ for unicode=start,start+count-1 do
+ local index = readushort(f)
+ if index > 0 then
+ local glyph = glyphs[index]
+ if not glyph.unicode then
+ glyph.unicode = unicode
+ end
+ mapmap[unicode] = index
+ end
+ end
+end
+
+formatreaders[12] = function(f,fontdata,offset)
+ f:seek("set",offset+2+2+4+4) -- skip format reserved length language
+ local mapmap = fontdata.map.map
+ local glyphs = fontdata.glyphs
+ local nofgroups = readulong(f)
+ for i=1,nofgroups do
+ local first = readulong(f)
+ local last = readulong(f)
+ local index = readulong(f)
+ for unicode=first,last do
+ local glyph = glyphs[index]
+ if not glyph.unicode then
+ glyph.unicode = unicode
+ end
+ mapmap[unicode] = index
+ index = index + 1
+ end
+ end
+end
+
+local function checkcmap(f,fontdata,records,platform,encoding,format)
+ local data = records[platform]
+ if not data then
+ return
+ end
+ data = data[encoding]
+ if not data then
+ return
+ end
+ data = data[format]
+ if not data then
+ return
+ end
+ local reader = formatreaders[format]
+ if not reader then
+ return
+ end
+ -- report("checking cmap: platform %a, encoding %a, format %a",platform,encoding,format)
+ reader(f,fontdata,data)
+ return true
+end
+
+function readers.cmap(f,fontdata,specification)
+ if specification.glyphs then
+ local datatable = fontdata.tables.cmap
+ if datatable then
+ local tableoffset = datatable.offset
+ f:seek("set",tableoffset)
+ local version = readushort(f)
+ local noftables = readushort(f)
+ local records = { }
+ local unicodecid = false
+ local variantcid = false
+ for i=1,noftables do
+ local platform = readushort(f)
+ local encoding = readushort(f)
+ local offset = readulong(f)
+ local record = records[platform]
+ if not record then
+ records[platform] = {
+ [encoding] = {
+ offsets = { offset },
+ formats = { },
+ }
+ }
+ else
+ local subtables = record[encoding]
+ if not subtables then
+ record[encoding] = {
+ offsets = { offset },
+ formats = { },
+ }
+ else
+ local offsets = subtables.offsets
+ offsets[#offsets+1] = offset
+ end
+ end
+ end
+ for platform, record in next, records do
+ for encoding, subtables in next, record do
+ local offsets = subtables.offsets
+ local formats = subtables.formats
+ for i=1,#offsets do
+ local offset = tableoffset + offsets[i]
+ f:seek("set",offset)
+ formats[readushort(f)] = offset
+ end
+ record[encoding] = formats
+ end
+ end
+ --
+ checkcmap(f,fontdata,records,3, 1, 4)
+ checkcmap(f,fontdata,records,3,10,12)
+ -- checkcmap(f,fontdata,records,0, 3, 4)
+ -- checkcmap(f,fontdata,records,1, 0, 6)
+ -- todo
+ variantcid = records[0] and records[0][5]
+ --
+ fontdata.cidmaps = {
+ version = version,
+ noftables = noftables,
+ records = records,
+ }
+ else
+ fontdata.cidmaps = { }
+ end
+ end
+end
+
+-- The glyf table depends on the loca table. We have one entry to much
+-- in the locations table (the last one is a dummy) because we need to
+-- calculate the size of a glyph blob from the delta, although we not
+-- need it in our usage (yet). We can remove the locations table when
+-- we're done (todo: cleanup finalizer).
+
+function readers.loca(f,fontdata,specification)
+ if specification.glyphs then
+ reportskippedtable("loca")
+ end
+end
+
+function readers.glyf(f,fontdata,specification) -- part goes to cff module
+ if specification.glyphs then
+ reportskippedtable("glyf")
+ end
+end
+
+-- Here we have a table that we really need for later processing although a more
+-- advanced gpos table can also be available. Todo: we need a 'fake' lookup for
+-- this (analogue to ff).
+
+function readers.kern(f,fontdata,specification)
+ if specification.kerns then
+ local datatable = fontdata.tables.kern
+ if datatable then
+ f:seek("set",datatable.offset)
+ local version = readushort(f)
+ local noftables = readushort(f)
+ for i=1,noftables do
+ local version = readushort(f)
+ local length = readushort(f)
+ local coverage = readushort(f)
+ -- bit 8-15 of coverage: format 0 or 2
+ local format = bit32.rshift(coverage,8) -- is this ok?
+ if format == 0 then
+ local nofpairs = readushort(f)
+ local searchrange = readushort(f)
+ local entryselector = readushort(f)
+ local rangeshift = readushort(f)
+ local kerns = { }
+ local glyphs = fontdata.glyphs
+ for i=1,nofpairs do
+ local left = readushort(f)
+ local right = readushort(f)
+ local kern = readfword(f)
+ local glyph = glyphs[left]
+ local kerns = glyph.kerns
+ if kerns then
+ kerns[right] = kern
+ else
+ glyph.kerns = { [right] = kern }
+ end
+ end
+ -- fontdata.kerns = kerns
+ elseif format == 2 then
+ report("todo: kern classes")
+ else
+ report("todo: kerns")
+ end
+ end
+ end
+ end
+end
+
+function readers.gdef(f,fontdata,specification)
+ if specification.details then
+ reportskippedtable("gdef")
+ end
+end
+
+function readers.gsub(f,fontdata,specification)
+ if specification.details then
+ reportskippedtable("gsub")
+ end
+end
+
+function readers.gpos(f,fontdata,specification)
+ if specification.details then
+ reportskippedtable("gpos")
+ end
+end
+
+function readers.math(f,fontdata,specification)
+ if specification.glyphs then
+ local datatable = fontdata.tables.math
+ if datatable then
+ f:seek("set",datatable.offset)
+ local scriptlist = readulong(f)
+ local featurelist = readulong(f)
+ local lookuplist = readulong(f)
+ -- todo
+ end
+ end
+end
+
+-- Goodie. A sequence instead of segments costs a bit more memory, some 300K on a
+-- dejavu serif and about the same on a pagella regular.
+
+local function packoutlines(data,makesequence)
+ local subfonts = data.subfonts
+ if subfonts then
+ for i=1,#subfonts do
+ packoutlines(subfonts[i],makesequence)
+ end
+ return
+ end
+ local common = data.segments
+ if common then
+ return
+ end
+ local glyphs = data.glyphs
+ if not glyphs then
+ return
+ end
+ if makesequence then
+ for index=1,#glyphs do
+ local glyph = glyphs[index]
+ local segments = glyph.segments
+ if segments then
+ local sequence = { }
+ local nofsequence = 0
+ for i=1,#segments do
+ local segment = segments[i]
+ local nofsegment = #segment
+ nofsequence = nofsequence + 1
+ sequence[nofsequence] = segment[nofsegment]
+ for i=1,nofsegment-1 do
+ nofsequence = nofsequence + 1
+ sequence[nofsequence] = segment[i]
+ end
+ end
+ glyph.sequence = sequence
+ glyph.segments = nil
+ end
+ end
+ else
+ local hash = { }
+ local common = { }
+ local reverse = { }
+ local last = 0
+ for index=1,#glyphs do
+ local segments = glyphs[index].segments
+ if segments then
+ for i=1,#segments do
+ local h = concat(segments[i]," ")
+ hash[h] = (hash[h] or 0) + 1
+ end
+ end
+ end
+ for index=1,#glyphs do
+ local segments = glyphs[index].segments
+ if segments then
+ for i=1,#segments do
+ local segment = segments[i]
+ local h = concat(segment," ")
+ if hash[h] > 1 then
+ local idx = reverse[h]
+ if not idx then
+ last = last + 1
+ reverse[h] = last
+ common[last] = segment
+ idx = last
+ end
+ segments[i] = idx
+ end
+ end
+ end
+ end
+ if last > 0 then
+ data.segments = common
+ end
+ end
+end
+
+local function unpackoutlines(data)
+ local subfonts = data.subfonts
+ if subfonts then
+ for i=1,#subfonts do
+ unpackoutlines(subfonts[i])
+ end
+ return
+ end
+ local common = data.segments
+ if not common then
+ return
+ end
+ local glyphs = data.glyphs
+ if not glyphs then
+ return
+ end
+ for index=1,#glyphs do
+ local segments = glyphs[index].segments
+ if segments then
+ for i=1,#segments do
+ local c = common[segments[i]]
+ if c then
+ segments[i] = c
+ end
+ end
+ end
+ end
+ data.segments = nil
+end
+
+otf.packoutlines = packoutlines
+otf.unpackoutlines = unpackoutlines
+
+-- Now comes the loader. The order of reading these matters as we need to know
+-- some properties in order to read following tables. When details is true we also
+-- initialize the glyphs data.
+
+-- options:
+--
+-- properties : common metrics, names, list of features
+-- glyphs : metrics, encoding
+-- shapes : sequences or segments
+-- kerns : global (ttf) kerns
+-- lookups : gsub and gpos lookups
+
+local function readdata(f,offset,specification)
+ if offset then
+ f:seek("set",offset)
+ end
+ local tables = { }
+ local basename = file.basename(specification.filename)
+ local filesize = specification.filesize
+ local fontdata = { -- some can/will go
+ filename = basename,
+ filesize = filesize,
+ version = readstring(f,4),
+ noftables = readushort(f),
+ searchrange = readushort(f), -- not needed
+ entryselector = readushort(f), -- not needed
+ rangeshift = readushort(f), -- not needed
+ tables = tables,
+ }
+ for i=1,fontdata.noftables do
+ local tag = lower(stripstring(readstring(f,4)))
+ local checksum = readulong(f) -- not used
+ local offset = readulong(f)
+ local length = readulong(f)
+ if offset + length > filesize then
+ report("bad %a table in file %a",tag,basename)
+ end
+ tables[tag] = {
+ checksum = checksum,
+ offset = offset,
+ length = length,
+ }
+ end
+ if specification.glyphs then
+ local glyphs = setmetatableindex(function(t,k)
+ local v = {
+ -- maybe more defaults
+ index = k,
+ }
+ t[k] = v
+ return v
+ end)
+ local map = {
+ map = { },
+ backmap = { },
+ }
+ fontdata.glyphs = glyphs
+ fontdata.map = map
+ end
+ readers["name"](f,fontdata,specification)
+ readers["os/2"](f,fontdata,specification)
+ readers["head"](f,fontdata,specification)
+ readers["maxp"](f,fontdata,specification)
+ readers["hhea"](f,fontdata,specification)
+ readers["hmtx"](f,fontdata,specification)
+ readers["post"](f,fontdata,specification)
+ readers["cff" ](f,fontdata,specification)
+ readers["cmap"](f,fontdata,specification)
+ readers["loca"](f,fontdata,specification)
+ readers["glyf"](f,fontdata,specification)
+ readers["kern"](f,fontdata,specification)
+ readers["gdef"](f,fontdata,specification)
+ readers["gsub"](f,fontdata,specification)
+ readers["gpos"](f,fontdata,specification)
+ readers["math"](f,fontdata,specification)
+ --
+ fontdata.locations = nil
+ fontdata.tables = nil
+ fontdata.cidmaps = nil
+ fontdata.dictionaries = nil
+ --
+ -- fontdata.cff = nil
+ --
+ return fontdata
+end
+
+local function loadfontdata(specification)
+ local filename = specification.filename
+ local filesize = file.size(filename)
+ local f = io.open(filename,"rb")
+ if f then
+ if filesize > 0 then
+ specification.filesize = filesize
+ local version = readstring(f,4)
+ local fontdata = nil
+ if version == "OTTO" or version == "true" or version == "\0\1\0\0" then
+ fontdata = readdata(f,0,specification)
+ elseif version == "ttcf" then
+ local subfont = tonumber(specification.subfont)
+ local offsets = { }
+ local ttcversion = readulong(f)
+ local nofsubfonts = readulong(f)
+ for i=1,nofsubfonts do
+ offsets[i] = readulong(f)
+ end
+ if subfont then
+ if subfont > 1 and subfont <= nofsubfonts then
+ fontdata = readdata(f,offsets[subfont],specification)
+ else
+ report("no subfont %a in file %a",subfont,filename)
+ end
+ else
+ local subfonts = { }
+ fontdata = {
+ filename = filename,
+ filesize = filesize,
+ version = version,
+ subfonts = subfonts,
+ ttcversion = ttcversion,
+ nofsubfonts = nofsubfonts,
+ }
+ for i=1,fontdata.nofsubfonts do
+ subfonts[i] = readdata(f,offsets[i],specification)
+ end
+ end
+ else
+ report("unknown version %a in file %a",version,filename)
+ end
+ f:close()
+ return fontdata
+ else
+ report("empty file %a",filename)
+ f:close()
+ end
+ else
+ report("unable to open %a",filename)
+ end
+end
+
+local function loadfont(specification)
+ if type(specification) == "string" then
+ specification = {
+ filename = specification,
+ info = true, -- always true (for now)
+ details = true,
+ glyphs = true,
+ shapes = true,
+ kerns = true,
+ lookups = true,
+ -- true or number:
+ subfont = true,
+ }
+ end
+ -- if shapes only then
+ if specification.shapes or specification.lookups or specification.kerns then
+ specification.glyphs = true
+ end
+ if specification.glyphs then
+ specification.details = true
+ end
+ if specification.details then
+ specification.info = true
+ end
+ local function message(str)
+ report("fatal error in file %a: %s",specification.filename,str)
+ end
+ local ok, result = xpcall(loadfontdata,message,specification)
+ if ok then
+ return result
+ end
+end
+
+readers.loadfont = loadfont
+
+----- validutf = lpeg.patterns.utf8character^0 * P(-1)
+local validutf = lpeg.patterns.validutf8
+
+local function getinfo(maindata,sub)
+ local fontdata = sub and maindata.subfonts[sub] or maindata
+ local names = fontdata.names
+ if names then
+ local metrics = fontdata.windowsmetrics or { }
+ local postscript = fontdata.postscript or { }
+ local fontheader = fontdata.fontheader or { }
+ local cffinfo = fontdata.cffinfo or { }
+ local filename = fontdata.filename
+ --
+ local function name(key)
+ local value = names[key]
+ if value then
+ local content = value.content
+ return lpegmatch(validutf,content) and content or nil
+ end
+ end
+ --
+ local weight = name("weight") or cffinfo.weight or metrics.weight
+ local width = name("width") or cffinfo.width or metrics.width
+ local info = { -- we inherit some inconsistencies/choices from ff
+ subfontindex = sub or 0,
+ -- filename = filename,
+ -- version = name("version"),
+ fontname = name("postscriptname"),
+ fullname = name("fullname"), -- or file.nameonly(filename)
+ familyname = name("typographicfamily") or name("family"),
+ subfamily = name("subfamily"),
+ modifiers = name("typographicsubfamily"),
+ weight = weight and lower(weight),
+ width = width and lower(width),
+ pfmweight = metrics.weightclass or 400, -- will become weightclass
+ pfmwidth = metrics.widthclass or 5, -- will become widthclass
+ panosewidth = metrics.panosewidth,
+ panoseweight = metrics.panoseweight,
+ italicangle = postscript.italicangle or 0,
+ units = fontheader.units or 0,
+ designsize = fontdata.designsize,
+ minsize = fontdata.minsize,
+ maxsize = fontdata.maxsize,
+ monospaced = (tonumber(postscript.monospaced or 0) > 0) or metrics.panosewidth == "monospaced",
+ }
+ return info
+ elseif n then
+ return {
+ filename = fontdata.filename,
+ comment = "there is no info for subfont " .. n,
+ }
+ else
+ return {
+ filename = fontdata.filename,
+ comment = "there is no info",
+ }
+ end
+end
+
+-- we need even less, but we can have a 'detail' variant
+
+function readers.loadshapes(filename,n)
+ local fontdata = loadfont {
+ filename = filename,
+ shapes = true,
+ subfont = n,
+ }
+ return fontdata and {
+ -- version = 0.123 -- todo
+ filename = filename,
+ glyphs = fontdata.glyphs,
+ units = fontdata.fontheader.units,
+ } or {
+ filename = filename,
+ glyphs = { },
+ units = 0,
+ }
+end
+
+function readers.getinfo(filename,n,details)
+ local fontdata = loadfont {
+ filename = filename,
+ details = true,
+ }
+-- if string.find(filename,"ource") then
+-- inspect(fontdata)
+-- end
+ if fontdata then
+ local subfonts = fontdata.subfonts
+ if not subfonts then
+ return getinfo(fontdata)
+ elseif type(n) ~= "number" then
+ local info = { }
+ for i=1,#subfonts do
+ info[i] = getinfo(fontdata,i)
+ end
+ return info
+ elseif n > 1 and n <= subfonts then
+ return getinfo(fontdata,n)
+ else
+ return {
+ filename = filename,
+ comment = "there is no subfont " .. n .. " in this file"
+ }
+ end
+ else
+ return {
+ filename = filename,
+ comment = "the file cannot be opened for reading",
+ }
+ end
+end
+
+--
+
+if fonts.hashes then
+
+ local identifiers = fonts.hashes.identifiers
+ local loadshapes = readers.loadshapes
+
+ readers.version = 0.006
+ readers.cache = containers.define("fonts", "shapes", readers.version, true)
+
+ -- todo: loaders per format
+
+ local function load(filename,sub)
+ local base = file.basename(filename)
+ local name = file.removesuffix(base)
+ local kind = file.suffix(filename)
+ local attr = lfs.attributes(filename)
+ local size = attr and attr.size or 0
+ local time = attr and attr.modification or 0
+ local sub = tonumber(sub)
+ if size > 0 and (kind == "otf" or kind == "ttf" or kind == "tcc") then
+ local hash = containers.cleanname(base) -- including suffix
+ if sub then
+ hash = hash .. "-" .. sub
+ end
+ data = containers.read(readers.cache,hash)
+ if not data or data.time ~= time or data.size ~= size then
+ data = loadshapes(filename,sub)
+ if data then
+ data.size = size
+ data.format = "opentype"
+ data.time = time
+ packoutlines(data)
+ containers.write(readers.cache,hash,data)
+ data = containers.read(readers.cache,hash) -- frees old mem
+ end
+ end
+ unpackoutlines(data)
+ else
+ data = {
+ filename = filename,
+ size = 0,
+ time = time,
+ format = "unknown",
+ units = 1000,
+ glyphs = { }
+ }
+ end
+ return data
+ end
+
+ fonts.hashes.shapes = table.setmetatableindex(function(t,k)
+ local d = identifiers[k]
+ local v = load(d.properties.filename,d.subindex)
+ t[k] = v
+ return v
+ end)
+
+end