if not modules then modules = { } end modules ['font-otr'] = { version = 1.001, comment = "companion to font-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- When looking into a cid font relates issue in the ff library I wondered if -- it made sense to use Lua to filter the information from the otf and ttf -- files. Quite some ff code relates to special fonts and in practice we only -- use rather normal opentype fonts. -- -- The code here is based on the documentation (and examples) at the microsoft -- website. The code will be extended and improved stepwise. After some experiments -- I decided to convert to a format more suitable for the context font handler -- because it makes no sense to rehash all those lookups again. -- -- Currently we can use this code for getting basic info about the font, loading -- shapes and loading the extensive table. I'm not sure if I will provide a ff -- compatible output as well (We're not that far from it as currently I can load -- all data reasonable fast.) -- We can omit redundant glyphs names i.e. ones that match the agl or -- are just a unicode string but it doesn't save that much. It will be an option -- some day. -- Optimizing the widths wil be done anyway as it save quite some on a cjk font -- and the existing (old) code if okay. -- todo: more messages (only if really needed) -- -- considered, in math: -- -- start -> first (so we can skip the first same-size one) -- end -> last -- -- Widths and weights are kind of messy: for instance lmmonolt has a pfmweight of -- 400 while it should be 300. So, for now we mostly stick to the old compromis. -- We don't really need all those language tables so they might be dropped some -- day. -- The new reader is faster on some aspects and slower on other. The memory footprint -- is lower. The string reader is a bit faster than the file reader. The new reader -- gives more efficient tables and has bit more analysis. In practice these times are -- not that relevant because we cache. The otf files take a it more time because we -- need to calculate the boundingboxes. In theory the processing of text should be -- somewhat faster especially for complex fonts with many lookups. -- -- old new str reader -- lmroman12-regular.otf 0.103 0.203 0.195 -- latinmodern-math.otf 0.454 0.768 0.712 -- husayni.ttf 1.142 1.526 1.259 -- -- If there is demand I will consider making a ff compatible table dumper but it's -- probably more fun to provide a way to show features applied. -- I experimented a bit with f:readbyte(n) and f:readshort() and so and it is indeed -- faster but it might not be the real bottleneck as we still need to juggle data. It -- is probably more memory efficient as no intermediate strings are involved. -- if not characters then -- require("char-def") -- require("char-ini") -- end local next, type, unpack = next, type, unpack local byte, lower, char, strip, gsub = string.byte, string.lower, string.char, string.strip, string.gsub local bittest = bit32.btest local concat, remove, unpack, fastcopy = table.concat, table.remov, table.unpack, table.fastcopy local floor, abs, sqrt, round = math.floor, math.abs, math.sqrt, math.round local P, R, S, C, Cs, Cc, Ct, Carg, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Carg, lpeg.Cmt local lpegmatch = lpeg.match local setmetatableindex = table.setmetatableindex local formatters = string.formatters local sortedkeys = table.sortedkeys local sortedhash = table.sortedhash local stripstring = string.strip local utf16_to_utf8_be = utf.utf16_to_utf8_be local report = logs.reporter("otf reader") local trace_cmap = false -- only for checking issues fonts = fonts or { } local handlers = fonts.handlers or { } fonts.handlers = handlers local otf = handlers.otf or { } handlers.otf = otf local readers = otf.readers or { } otf.readers = readers ----- streamreader = utilities.streams -- faster on big files local streamreader = utilities.files -- faster on identify local streamwriter = utilities.files readers.streamreader = streamreader readers.streamwriter = streamwriter local openfile = streamreader.open local closefile = streamreader.close ----- skipbytes = streamreader.skip local setposition = streamreader.setposition local skipshort = streamreader.skipshort local readbytes = streamreader.readbytes local readstring = streamreader.readstring local readbyte = streamreader.readcardinal1 -- 8-bit unsigned integer local readushort = streamreader.readcardinal2 -- 16-bit unsigned integer local readuint = streamreader.readcardinal3 -- 24-bit unsigned integer local readulong = streamreader.readcardinal4 -- 24-bit unsigned integer ----- readchar = streamreader.readinteger1 -- 8-bit signed integer local readshort = streamreader.readinteger2 -- 16-bit signed integer local readlong = streamreader.readinteger4 -- 24-bit unsigned integer local readfixed = streamreader.readfixed4 local readfword = readshort -- 16-bit signed integer that describes a quantity in FUnits local readufword = readushort -- 16-bit unsigned integer that describes a quantity in FUnits local readoffset = readushort local read2dot14 = streamreader.read2dot14 -- 16-bit signed fixed number with the low 14 bits of fraction (2.14) (F2DOT14) function streamreader.readtag(f) return lower(strip(readstring(f,4))) end -- date represented in number of seconds since 12:00 midnight, January 1, 1904. The value is represented as a -- signed 64-bit integer local function readlongdatetime(f) local a, b, c, d, e, f, g, h = readbytes(f,8) return 0x100000000 * d + 0x1000000 * e + 0x10000 * f + 0x100 * g + h end local tableversion = 0.004 readers.tableversion = tableversion local privateoffset = fonts.constructors and fonts.constructors.privateoffset or 0xF0000 -- 0x10FFFF local reportedskipped = { } local function reportskippedtable(tag) if not reportedskipped[tag] then report("loading of table %a skipped (reported once only)",tag) reportedskipped[tag] = true end end -- We have quite some data tables. We are somewhat ff compatible with names but as I used -- the information from the microsoft site there can be differences. Eventually I might end -- up with a different ordering and naming. local reservednames = { [0] = "copyright", "family", "subfamily", "uniqueid", "fullname", "version", "postscriptname", "trademark", "manufacturer", "designer", "description", -- descriptor in ff "vendorurl", "designerurl", "license", "licenseurl", "reserved", "typographicfamily", -- preffamilyname "typographicsubfamily", -- prefmodifiers "compatiblefullname", -- for mac "sampletext", "cidfindfontname", "wwsfamily", "wwssubfamily", "lightbackgroundpalette", "darkbackgroundpalette", } -- more at: https://www.microsoft.com/typography/otspec/name.htm -- setmetatableindex(reservednames,function(t,k) -- local v = "name_" .. k -- t[k] = v -- return v -- end) local platforms = { [0] = "unicode", "macintosh", "iso", "windows", "custom", } local encodings = { -- these stay: unicode = { [0] = "unicode 1.0 semantics", "unicode 1.1 semantics", "iso/iec 10646", "unicode 2.0 bmp", -- cmap subtable formats 0, 4, 6 "unicode 2.0 full", -- cmap subtable formats 0, 4, 6, 10, 12 "unicode variation sequences", -- cmap subtable format 14). "unicode full repertoire", -- cmap subtable formats 0, 4, 6, 10, 12, 13 }, -- these can go: macintosh = { [0] = "roman", "japanese", "chinese (traditional)", "korean", "arabic", "hebrew", "greek", "russian", "rsymbol", "devanagari", "gurmukhi", "gujarati", "oriya", "bengali", "tamil", "telugu", "kannada", "malayalam", "sinhalese", "burmese", "khmer", "thai", "laotian", "georgian", "armenian", "chinese (simplified)", "tibetan", "mongolian", "geez", "slavic", "vietnamese", "sindhi", "uninterpreted", }, -- these stay: iso = { [0] = "7-bit ascii", "iso 10646", "iso 8859-1", }, -- these stay: windows = { [0] = "symbol", "unicode bmp", -- this is utf16 "shiftjis", "prc", "big5", "wansung", "johab", "reserved 7", "reserved 8", "reserved 9", "unicode ucs-4", }, custom = { --custom: 0-255 : otf windows nt compatibility mapping } } local decoders = { unicode = { }, macintosh = { }, iso = { }, windows = { -- maybe always utf16 ["unicode semantics"] = utf16_to_utf8_be, ["unicode bmp"] = utf16_to_utf8_be, ["unicode full"] = utf16_to_utf8_be, ["unicode 1.0 semantics"] = utf16_to_utf8_be, ["unicode 1.1 semantics"] = utf16_to_utf8_be, ["unicode 2.0 bmp"] = utf16_to_utf8_be, ["unicode 2.0 full"] = utf16_to_utf8_be, ["unicode variation sequences"] = utf16_to_utf8_be, ["unicode full repertoire"] = utf16_to_utf8_be, }, custom = { }, } -- This is bit over the top as we can just look for either windows, unicode or macintosh -- names (in that order). A font with no english name is probably a weird one anyway. local languages = { -- these stay: unicode = { [ 0] = "english", }, -- english can stay: macintosh = { [ 0] = "english", -- [ 1] = "french", -- [ 2] = "german", -- [ 3] = "italian", -- [ 4] = "dutch", -- [ 5] = "swedish", -- [ 6] = "spanish", -- [ 7] = "danish", -- [ 8] = "portuguese", -- [ 9] = "norwegian", -- [ 10] = "hebrew", -- [ 11] = "japanese", -- [ 12] = "arabic", -- [ 13] = "finnish", -- [ 14] = "greek", -- [ 15] = "icelandic", -- [ 16] = "maltese", -- [ 17] = "turkish", -- [ 18] = "croatian", -- [ 19] = "chinese (traditional)", -- [ 20] = "urdu", -- [ 21] = "hindi", -- [ 22] = "thai", -- [ 23] = "korean", -- [ 24] = "lithuanian", -- [ 25] = "polish", -- [ 26] = "hungarian", -- [ 27] = "estonian", -- [ 28] = "latvian", -- [ 29] = "sami", -- [ 30] = "faroese", -- [ 31] = "farsi/persian", -- [ 32] = "russian", -- [ 33] = "chinese (simplified)", -- [ 34] = "flemish", -- [ 35] = "irish gaelic", -- [ 36] = "albanian", -- [ 37] = "romanian", -- [ 38] = "czech", -- [ 39] = "slovak", -- [ 40] = "slovenian", -- [ 41] = "yiddish", -- [ 42] = "serbian", -- [ 43] = "macedonian", -- [ 44] = "bulgarian", -- [ 45] = "ukrainian", -- [ 46] = "byelorussian", -- [ 47] = "uzbek", -- [ 48] = "kazakh", -- [ 49] = "azerbaijani (cyrillic script)", -- [ 50] = "azerbaijani (arabic script)", -- [ 51] = "armenian", -- [ 52] = "georgian", -- [ 53] = "moldavian", -- [ 54] = "kirghiz", -- [ 55] = "tajiki", -- [ 56] = "turkmen", -- [ 57] = "mongolian (mongolian script)", -- [ 58] = "mongolian (cyrillic script)", -- [ 59] = "pashto", -- [ 60] = "kurdish", -- [ 61] = "kashmiri", -- [ 62] = "sindhi", -- [ 63] = "tibetan", -- [ 64] = "nepali", -- [ 65] = "sanskrit", -- [ 66] = "marathi", -- [ 67] = "bengali", -- [ 68] = "assamese", -- [ 69] = "gujarati", -- [ 70] = "punjabi", -- [ 71] = "oriya", -- [ 72] = "malayalam", -- [ 73] = "kannada", -- [ 74] = "tamil", -- [ 75] = "telugu", -- [ 76] = "sinhalese", -- [ 77] = "burmese", -- [ 78] = "khmer", -- [ 79] = "lao", -- [ 80] = "vietnamese", -- [ 81] = "indonesian", -- [ 82] = "tagalong", -- [ 83] = "malay (roman script)", -- [ 84] = "malay (arabic script)", -- [ 85] = "amharic", -- [ 86] = "tigrinya", -- [ 87] = "galla", -- [ 88] = "somali", -- [ 89] = "swahili", -- [ 90] = "kinyarwanda/ruanda", -- [ 91] = "rundi", -- [ 92] = "nyanja/chewa", -- [ 93] = "malagasy", -- [ 94] = "esperanto", -- [128] = "welsh", -- [129] = "basque", -- [130] = "catalan", -- [131] = "latin", -- [132] = "quenchua", -- [133] = "guarani", -- [134] = "aymara", -- [135] = "tatar", -- [136] = "uighur", -- [137] = "dzongkha", -- [138] = "javanese (roman script)", -- [139] = "sundanese (roman script)", -- [140] = "galician", -- [141] = "afrikaans", -- [142] = "breton", -- [143] = "inuktitut", -- [144] = "scottish gaelic", -- [145] = "manx gaelic", -- [146] = "irish gaelic (with dot above)", -- [147] = "tongan", -- [148] = "greek (polytonic)", -- [149] = "greenlandic", -- [150] = "azerbaijani (roman script)", }, -- these can stay: iso = { }, -- english can stay: windows = { -- [0x0436] = "afrikaans - south africa", -- [0x041c] = "albanian - albania", -- [0x0484] = "alsatian - france", -- [0x045e] = "amharic - ethiopia", -- [0x1401] = "arabic - algeria", -- [0x3c01] = "arabic - bahrain", -- [0x0c01] = "arabic - egypt", -- [0x0801] = "arabic - iraq", -- [0x2c01] = "arabic - jordan", -- [0x3401] = "arabic - kuwait", -- [0x3001] = "arabic - lebanon", -- [0x1001] = "arabic - libya", -- [0x1801] = "arabic - morocco", -- [0x2001] = "arabic - oman", -- [0x4001] = "arabic - qatar", -- [0x0401] = "arabic - saudi arabia", -- [0x2801] = "arabic - syria", -- [0x1c01] = "arabic - tunisia", -- [0x3801] = "arabic - u.a.e.", -- [0x2401] = "arabic - yemen", -- [0x042b] = "armenian - armenia", -- [0x044d] = "assamese - india", -- [0x082c] = "azeri (cyrillic) - azerbaijan", -- [0x042c] = "azeri (latin) - azerbaijan", -- [0x046d] = "bashkir - russia", -- [0x042d] = "basque - basque", -- [0x0423] = "belarusian - belarus", -- [0x0845] = "bengali - bangladesh", -- [0x0445] = "bengali - india", -- [0x201a] = "bosnian (cyrillic) - bosnia and herzegovina", -- [0x141a] = "bosnian (latin) - bosnia and herzegovina", -- [0x047e] = "breton - france", -- [0x0402] = "bulgarian - bulgaria", -- [0x0403] = "catalan - catalan", -- [0x0c04] = "chinese - hong kong s.a.r.", -- [0x1404] = "chinese - macao s.a.r.", -- [0x0804] = "chinese - people's republic of china", -- [0x1004] = "chinese - singapore", -- [0x0404] = "chinese - taiwan", -- [0x0483] = "corsican - france", -- [0x041a] = "croatian - croatia", -- [0x101a] = "croatian (latin) - bosnia and herzegovina", -- [0x0405] = "czech - czech republic", -- [0x0406] = "danish - denmark", -- [0x048c] = "dari - afghanistan", -- [0x0465] = "divehi - maldives", -- [0x0813] = "dutch - belgium", -- [0x0413] = "dutch - netherlands", -- [0x0c09] = "english - australia", -- [0x2809] = "english - belize", -- [0x1009] = "english - canada", -- [0x2409] = "english - caribbean", -- [0x4009] = "english - india", -- [0x1809] = "english - ireland", -- [0x2009] = "english - jamaica", -- [0x4409] = "english - malaysia", -- [0x1409] = "english - new zealand", -- [0x3409] = "english - republic of the philippines", -- [0x4809] = "english - singapore", -- [0x1c09] = "english - south africa", -- [0x2c09] = "english - trinidad and tobago", -- [0x0809] = "english - united kingdom", [0x0409] = "english - united states", -- [0x3009] = "english - zimbabwe", -- [0x0425] = "estonian - estonia", -- [0x0438] = "faroese - faroe islands", -- [0x0464] = "filipino - philippines", -- [0x040b] = "finnish - finland", -- [0x080c] = "french - belgium", -- [0x0c0c] = "french - canada", -- [0x040c] = "french - france", -- [0x140c] = "french - luxembourg", -- [0x180c] = "french - principality of monoco", -- [0x100c] = "french - switzerland", -- [0x0462] = "frisian - netherlands", -- [0x0456] = "galician - galician", -- [0x0437] = "georgian -georgia", -- [0x0c07] = "german - austria", -- [0x0407] = "german - germany", -- [0x1407] = "german - liechtenstein", -- [0x1007] = "german - luxembourg", -- [0x0807] = "german - switzerland", -- [0x0408] = "greek - greece", -- [0x046f] = "greenlandic - greenland", -- [0x0447] = "gujarati - india", -- [0x0468] = "hausa (latin) - nigeria", -- [0x040d] = "hebrew - israel", -- [0x0439] = "hindi - india", -- [0x040e] = "hungarian - hungary", -- [0x040f] = "icelandic - iceland", -- [0x0470] = "igbo - nigeria", -- [0x0421] = "indonesian - indonesia", -- [0x045d] = "inuktitut - canada", -- [0x085d] = "inuktitut (latin) - canada", -- [0x083c] = "irish - ireland", -- [0x0434] = "isixhosa - south africa", -- [0x0435] = "isizulu - south africa", -- [0x0410] = "italian - italy", -- [0x0810] = "italian - switzerland", -- [0x0411] = "japanese - japan", -- [0x044b] = "kannada - india", -- [0x043f] = "kazakh - kazakhstan", -- [0x0453] = "khmer - cambodia", -- [0x0486] = "k'iche - guatemala", -- [0x0487] = "kinyarwanda - rwanda", -- [0x0441] = "kiswahili - kenya", -- [0x0457] = "konkani - india", -- [0x0412] = "korean - korea", -- [0x0440] = "kyrgyz - kyrgyzstan", -- [0x0454] = "lao - lao p.d.r.", -- [0x0426] = "latvian - latvia", -- [0x0427] = "lithuanian - lithuania", -- [0x082e] = "lower sorbian - germany", -- [0x046e] = "luxembourgish - luxembourg", -- [0x042f] = "macedonian (fyrom) - former yugoslav republic of macedonia", -- [0x083e] = "malay - brunei darussalam", -- [0x043e] = "malay - malaysia", -- [0x044c] = "malayalam - india", -- [0x043a] = "maltese - malta", -- [0x0481] = "maori - new zealand", -- [0x047a] = "mapudungun - chile", -- [0x044e] = "marathi - india", -- [0x047c] = "mohawk - mohawk", -- [0x0450] = "mongolian (cyrillic) - mongolia", -- [0x0850] = "mongolian (traditional) - people's republic of china", -- [0x0461] = "nepali - nepal", -- [0x0414] = "norwegian (bokmal) - norway", -- [0x0814] = "norwegian (nynorsk) - norway", -- [0x0482] = "occitan - france", -- [0x0448] = "odia (formerly oriya) - india", -- [0x0463] = "pashto - afghanistan", -- [0x0415] = "polish - poland", -- [0x0416] = "portuguese - brazil", -- [0x0816] = "portuguese - portugal", -- [0x0446] = "punjabi - india", -- [0x046b] = "quechua - bolivia", -- [0x086b] = "quechua - ecuador", -- [0x0c6b] = "quechua - peru", -- [0x0418] = "romanian - romania", -- [0x0417] = "romansh - switzerland", -- [0x0419] = "russian - russia", -- [0x243b] = "sami (inari) - finland", -- [0x103b] = "sami (lule) - norway", -- [0x143b] = "sami (lule) - sweden", -- [0x0c3b] = "sami (northern) - finland", -- [0x043b] = "sami (northern) - norway", -- [0x083b] = "sami (northern) - sweden", -- [0x203b] = "sami (skolt) - finland", -- [0x183b] = "sami (southern) - norway", -- [0x1c3b] = "sami (southern) - sweden", -- [0x044f] = "sanskrit - india", -- [0x1c1a] = "serbian (cyrillic) - bosnia and herzegovina", -- [0x0c1a] = "serbian (cyrillic) - serbia", -- [0x181a] = "serbian (latin) - bosnia and herzegovina", -- [0x081a] = "serbian (latin) - serbia", -- [0x046c] = "sesotho sa leboa - south africa", -- [0x0432] = "setswana - south africa", -- [0x045b] = "sinhala - sri lanka", -- [0x041b] = "slovak - slovakia", -- [0x0424] = "slovenian - slovenia", -- [0x2c0a] = "spanish - argentina", -- [0x400a] = "spanish - bolivia", -- [0x340a] = "spanish - chile", -- [0x240a] = "spanish - colombia", -- [0x140a] = "spanish - costa rica", -- [0x1c0a] = "spanish - dominican republic", -- [0x300a] = "spanish - ecuador", -- [0x440a] = "spanish - el salvador", -- [0x100a] = "spanish - guatemala", -- [0x480a] = "spanish - honduras", -- [0x080a] = "spanish - mexico", -- [0x4c0a] = "spanish - nicaragua", -- [0x180a] = "spanish - panama", -- [0x3c0a] = "spanish - paraguay", -- [0x280a] = "spanish - peru", -- [0x500a] = "spanish - puerto rico", -- [0x0c0a] = "spanish (modern sort) - spain", -- [0x040a] = "spanish (traditional sort) - spain", -- [0x540a] = "spanish - united states", -- [0x380a] = "spanish - uruguay", -- [0x200a] = "spanish - venezuela", -- [0x081d] = "sweden - finland", -- [0x041d] = "swedish - sweden", -- [0x045a] = "syriac - syria", -- [0x0428] = "tajik (cyrillic) - tajikistan", -- [0x085f] = "tamazight (latin) - algeria", -- [0x0449] = "tamil - india", -- [0x0444] = "tatar - russia", -- [0x044a] = "telugu - india", -- [0x041e] = "thai - thailand", -- [0x0451] = "tibetan - prc", -- [0x041f] = "turkish - turkey", -- [0x0442] = "turkmen - turkmenistan", -- [0x0480] = "uighur - prc", -- [0x0422] = "ukrainian - ukraine", -- [0x042e] = "upper sorbian - germany", -- [0x0420] = "urdu - islamic republic of pakistan", -- [0x0843] = "uzbek (cyrillic) - uzbekistan", -- [0x0443] = "uzbek (latin) - uzbekistan", -- [0x042a] = "vietnamese - vietnam", -- [0x0452] = "welsh - united kingdom", -- [0x0488] = "wolof - senegal", -- [0x0485] = "yakut - russia", -- [0x0478] = "yi - prc", -- [0x046a] = "yoruba - nigeria", }, custom = { }, } local standardromanencoding = { [0] = -- taken from wikipedia "notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", "quotesingle", "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft", "backslash", "bracketright", "asciicircum", "underscore", "grave", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde", "Adieresis", "Aring", "Ccedilla", "Eacute", "Ntilde", "Odieresis", "Udieresis", "aacute", "agrave", "acircumflex", "adieresis", "atilde", "aring", "ccedilla", "eacute", "egrave", "ecircumflex", "edieresis", "iacute", "igrave", "icircumflex", "idieresis", "ntilde", "oacute", "ograve", "ocircumflex", "odieresis", "otilde", "uacute", "ugrave", "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling", "section", "bullet", "paragraph", "germandbls", "registered", "copyright", "trademark", "acute", "dieresis", "notequal", "AE", "Oslash", "infinity", "plusminus", "lessequal", "greaterequal", "yen", "mu", "partialdiff", "summation", "product", "pi", "integral", "ordfeminine", "ordmasculine", "Omega", "ae", "oslash", "questiondown", "exclamdown", "logicalnot", "radical", "florin", "approxequal", "Delta", "guillemotleft", "guillemotright", "ellipsis", "nonbreakingspace", "Agrave", "Atilde", "Otilde", "OE", "oe", "endash", "emdash", "quotedblleft", "quotedblright", "quoteleft", "quoteright", "divide", "lozenge", "ydieresis", "Ydieresis", "fraction", "currency", "guilsinglleft", "guilsinglright", "fi", "fl", "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase", "perthousand", "Acircumflex", "Ecircumflex", "Aacute", "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex", "apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave", "dotlessi", "circumflex", "tilde", "macron", "breve", "dotaccent", "ring", "cedilla", "hungarumlaut", "ogonek", "caron", "Lslash", "lslash", "Scaron", "scaron", "Zcaron", "zcaron", "brokenbar", "Eth", "eth", "Yacute", "yacute", "Thorn", "thorn", "minus", "multiply", "onesuperior", "twosuperior", "threesuperior", "onehalf", "onequarter", "threequarters", "franc", "Gbreve", "gbreve", "Idotaccent", "Scedilla", "scedilla", "Cacute", "cacute", "Ccaron", "ccaron", "dcroat", } local weights = { [100] = "thin", [200] = "extralight", [300] = "light", [400] = "normal", [500] = "medium", [600] = "semibold", -- demi demibold [700] = "bold", [800] = "extrabold", [900] = "black", } local widths = { [1] = "ultracondensed", [2] = "extracondensed", [3] = "condensed", [4] = "semicondensed", [5] = "normal", [6] = "semiexpanded", [7] = "expanded", [8] = "extraexpanded", [9] = "ultraexpanded", } setmetatableindex(weights, function(t,k) local r = floor((k + 50) / 100) * 100 local v = (r > 900 and "black") or rawget(t,r) or "normal" -- print("weight:",k,r,v) return v end) setmetatableindex(widths,function(t,k) -- print("width:",k) return "normal" end) local panoseweights = { [ 0] = "normal", [ 1] = "normal", [ 2] = "verylight", [ 3] = "light", [ 4] = "thin", [ 5] = "book", [ 6] = "medium", [ 7] = "demi", [ 8] = "bold", [ 9] = "heavy", [10] = "black", } local panosewidths = { [ 0] = "normal", [ 1] = "normal", [ 2] = "normal", [ 3] = "normal", [ 4] = "normal", [ 5] = "expanded", [ 6] = "condensed", [ 7] = "veryexpanded", [ 8] = "verycondensed", [ 9] = "monospaced", } -- We implement a reader per table. -- The name table is probably the first one to load. After all this one provides -- useful information about what we deal with. The complication is that we need -- to filter the best one available. local platformnames = { postscriptname = true, fullname = true, family = true, subfamily = true, typographicfamily = true, typographicsubfamily = true, compatiblefullname = true, } function readers.name(f,fontdata,specification) local datatable = fontdata.tables.name if datatable then setposition(f,datatable.offset) local format = readushort(f) local nofnames = readushort(f) local offset = readushort(f) -- we can also provide a raw list as extra, todo as option local start = datatable.offset + offset local namelists = { unicode = { }, windows = { }, macintosh = { }, -- iso = { }, -- windows = { }, } for i=1,nofnames do local platform = platforms[readushort(f)] if platform then local namelist = namelists[platform] if namelist then local encoding = readushort(f) local language = readushort(f) local encodings = encodings[platform] local languages = languages[platform] if encodings and languages then local encoding = encodings[encoding] local language = languages[language] if encoding and language then local name = reservednames[readushort(f)] if name then namelist[#namelist+1] = { platform = platform, encoding = encoding, language = language, name = name, length = readushort(f), offset = start + readushort(f), } else skipshort(f,2) end else skipshort(f,3) end else skipshort(f,3) end else skipshort(f,5) end else skipshort(f,5) end end -- if format == 1 then -- local noftags = readushort(f) -- for i=1,noftags do -- local length = readushort(f) -- local offset = readushort(f) -- end -- end -- -- we need to choose one we like, for instance an unicode one -- local names = { } local done = { } -- -- there is quite some logic in ff ... hard to follow so we start simple -- and extend when we run into it (todo: proper reverse hash) .. we're only -- interested in english anyway -- local function filter(platform,e,l) local namelist = namelists[platform] for i=1,#namelist do local name = namelist[i] local nametag = name.name if not done[nametag] then local encoding = name.encoding local language = name.language if (not e or encoding == e) and (not l or language == l) then setposition(f,name.offset) local content = readstring(f,name.length) local decoder = decoders[platform] if decoder then decoder = decoder[encoding] end if decoder then content = decoder(content) end names[nametag] = { content = content, platform = platform, encoding = encoding, language = language, } done[nametag] = true end end end end -- filter("windows","unicode bmp","english - united states") -- filter("unicode") -- which one ? filter("macintosh","roman","english") filter("windows") filter("macintosh") filter("unicode") -- fontdata.names = names -- if specification.platformnames then local collected = { } for platform, namelist in next, namelists do local filtered = false for i=1,#namelist do local entry = namelist[i] local name = entry.name if platformnames[name] then setposition(f,entry.offset) local content = readstring(f,entry.length) local encoding = entry.encoding local decoder = decoders[platform] if decoder then decoder = decoder[encoding] end if decoder then content = decoder(content) end if filtered then filtered[name] = content else filtered = { [name] = content } end end end if filtered then collected[platform] = filtered end end fontdata.platformnames = collected end else fontdata.names = { } end end ----- validutf = lpeg.patterns.utf8character^0 * P(-1) local validutf = lpeg.patterns.validutf8 local function getname(fontdata,key) local names = fontdata.names if names then local value = names[key] if value then local content = value.content return lpegmatch(validutf,content) and content or nil end end end -- This table is an original windows (with its precursor os/2) table. In ff this one is -- part of the pfminfo table but here we keep it separate (for now). We will create a -- properties table afterwards. readers["os/2"] = function(f,fontdata) local datatable = fontdata.tables["os/2"] if datatable then setposition(f,datatable.offset) local version = readushort(f) local windowsmetrics = { version = version, averagewidth = readshort(f), weightclass = readushort(f), widthclass = readushort(f), fstype = readushort(f), subscriptxsize = readshort(f), subscriptysize = readshort(f), subscriptxoffset = readshort(f), subscriptyoffset = readshort(f), superscriptxsize = readshort(f), superscriptysize = readshort(f), superscriptxoffset = readshort(f), superscriptyoffset = readshort(f), strikeoutsize = readshort(f), strikeoutpos = readshort(f), familyclass = readshort(f), panose = { readbytes(f,10) }, unicoderanges = { readulong(f), readulong(f), readulong(f), readulong(f) }, vendor = readstring(f,4), fsselection = readushort(f), firstcharindex = readushort(f), lastcharindex = readushort(f), typoascender = readshort(f), typodescender = readshort(f), typolinegap = readshort(f), winascent = readushort(f), windescent = readushort(f), } if version >= 1 then windowsmetrics.codepageranges = { readulong(f), readulong(f) } end if version >= 3 then windowsmetrics.xheight = readshort(f) windowsmetrics.capheight = readshort(f) windowsmetrics.defaultchar = readushort(f) windowsmetrics.breakchar = readushort(f) -- windowsmetrics.maxcontexts = readushort(f) -- windowsmetrics.loweropticalpointsize = readushort(f) -- windowsmetrics.upperopticalpointsize = readushort(f) end -- -- todo: unicoderanges -- windowsmetrics.weight = windowsmetrics.weightclass and weights[windowsmetrics.weightclass] windowsmetrics.width = windowsmetrics.widthclass and widths [windowsmetrics.widthclass] -- windowsmetrics.panoseweight = panoseweights[windowsmetrics.panose[3]] windowsmetrics.panosewidth = panosewidths [windowsmetrics.panose[4]] -- fontdata.windowsmetrics = windowsmetrics else fontdata.windowsmetrics = { } end end readers.head = function(f,fontdata) local datatable = fontdata.tables.head if datatable then setposition(f,datatable.offset) local fontheader = { version = readfixed(f), revision = readfixed(f), checksum = readulong(f), magic = readulong(f), flags = readushort(f), units = readushort(f), created = readlongdatetime(f), modified = readlongdatetime(f), xmin = readshort(f), ymin = readshort(f), xmax = readshort(f), ymax = readshort(f), macstyle = readushort(f), smallpixels = readushort(f), directionhint = readshort(f), indextolocformat = readshort(f), glyphformat = readshort(f), } fontdata.fontheader = fontheader else fontdata.fontheader = { } end fontdata.nofglyphs = 0 end -- This table is a rather simple one. No treatment of values is needed here. Most -- variables are not used but nofmetrics is quite important. readers.hhea = function(f,fontdata,specification) if specification.details then local datatable = fontdata.tables.hhea if datatable then setposition(f,datatable.offset) fontdata.horizontalheader = { version = readfixed(f), -- two ushorts: major minor ascender = readfword(f), descender = readfword(f), linegap = readfword(f), maxadvancewidth = readufword(f), minleftsidebearing = readfword(f), minrightsidebearing = readfword(f), maxextent = readfword(f), caretsloperise = readshort(f), caretsloperun = readshort(f), caretoffset = readshort(f), reserved_1 = readshort(f), reserved_2 = readshort(f), reserved_3 = readshort(f), reserved_4 = readshort(f), metricdataformat = readshort(f), nofmetrics = readushort(f), } else fontdata.horizontalheader = { nofmetrics = 0, } end end end readers.vhea = function(f,fontdata,specification) if specification.details then local datatable = fontdata.tables.vhea if datatable then setposition(f,datatable.offset) local version = readfixed(f) fontdata.verticalheader = { version = version, ascender = readfword(f), descender = readfword(f), linegap = readfword(f), maxadvanceheight = readufword(f), mintopsidebearing = readfword(f), minbottomsidebearing = readfword(f), maxextent = readfword(f), caretsloperise = readshort(f), caretsloperun = readshort(f), caretoffset = readshort(f), reserved_1 = readshort(f), reserved_2 = readshort(f), reserved_3 = readshort(f), reserved_4 = readshort(f), metricdataformat = readshort(f), nofmetrics = readushort(f), } -- inspect(fontdata.verticalheader) else fontdata.verticalheader = { nofmetrics = 0, } end end end -- We probably never need all these variables, but we do need the nofglyphs when loading other -- tables. Again we use the microsoft names but see no reason to have "max" in each name. -- fontdata.maximumprofile can be bad readers.maxp = function(f,fontdata,specification) if specification.details then local datatable = fontdata.tables.maxp if datatable then setposition(f,datatable.offset) local version = readfixed(f) local nofglyphs = readushort(f) fontdata.nofglyphs = nofglyphs if version == 0.5 then fontdata.maximumprofile = { version = version, nofglyphs = nofglyphs, } return elseif version == 1.0 then fontdata.maximumprofile = { version = version, nofglyphs = nofglyphs, points = readushort(f), contours = readushort(f), compositepoints = readushort(f), compositecontours = readushort(f), zones = readushort(f), twilightpoints = readushort(f), storage = readushort(f), functiondefs = readushort(f), instructiondefs = readushort(f), stackelements = readushort(f), sizeofinstructions = readushort(f), componentelements = readushort(f), componentdepth = readushort(f), } return end end fontdata.maximumprofile = { version = version, nofglyphs = 0, } end end -- Here we filter the (advance) widths (that can be different from the boundingbox width of -- course). readers.hmtx = function(f,fontdata,specification) if specification.glyphs then local datatable = fontdata.tables.hmtx if datatable then setposition(f,datatable.offset) local horizontalheader = fontdata.horizontalheader local nofmetrics = horizontalheader.nofmetrics local glyphs = fontdata.glyphs local nofglyphs = fontdata.nofglyphs local width = 0 -- advance local leftsidebearing = 0 for i=0,nofmetrics-1 do local glyph = glyphs[i] width = readshort(f) leftsidebearing = readshort(f) if width ~= 0 then glyph.width = width end -- if leftsidebearing ~= 0 then -- glyph.lsb = leftsidebearing -- end end -- The next can happen in for instance a monospace font or in a cjk font -- with fixed widths. for i=nofmetrics,nofglyphs-1 do local glyph = glyphs[i] if width ~= 0 then glyph.width = width end -- if leftsidebearing ~= 0 then -- glyph.lsb = leftsidebearing -- end end end end end readers.vmtx = function(f,fontdata,specification) if specification.glyphs then local datatable = fontdata.tables.vmtx if datatable then setposition(f,datatable.offset) local verticalheader = fontdata.verticalheader local nofmetrics = verticalheader.nofmetrics local glyphs = fontdata.glyphs local nofglyphs = fontdata.nofglyphs local vheight = 0 local vdefault = verticalheader.ascender + verticalheader.descender local topsidebearing = 0 for i=0,nofmetrics-1 do local glyph = glyphs[i] vheight = readshort(f) topsidebearing = readshort(f) if vheight ~= 0 and vheight ~= vdefault then glyph.vheight = vheight end -- if topsidebearing ~= 0 then -- glyph.tsb = topsidebearing -- end end -- The next can happen in for instance a monospace font or in a cjk font -- with fixed heights. for i=nofmetrics,nofglyphs-1 do local glyph = glyphs[i] if vheight ~= 0 and vheight ~= vdefault then glyph.vheight = vheight end -- if topsidebearing ~= 0 then -- glyph.tsb = topsidebearing -- end end end end end readers.vorg = function(f,fontdata,specification) if specification.glyphs then local datatable = fontdata.tables.vorg if datatable then report("todo: %s","vorg") end end end -- The post table relates to postscript (printing) but has some relevant properties for other -- usage as well. We just use the names from the microsoft specification. The version 2.0 -- description is somewhat fuzzy but it is a hybrid with overloads. readers.post = function(f,fontdata,specification) local datatable = fontdata.tables.post if datatable then setposition(f,datatable.offset) local version = readfixed(f) fontdata.postscript = { version = version, italicangle = round(1000*readfixed(f))/1000, underlineposition = readfword(f), underlinethickness = readfword(f), monospaced = readulong(f), minmemtype42 = readulong(f), maxmemtype42 = readulong(f), minmemtype1 = readulong(f), maxmemtype1 = readulong(f), } if not specification.glyphs then -- enough done elseif version == 1.0 then -- mac encoding (258 glyphs) for index=0,#standardromanencoding do glyphs[index].name = standardromanencoding[index] end elseif version == 2.0 then local glyphs = fontdata.glyphs local nofglyphs = readushort(f) local indices = { } local names = { } local maxnames = 0 for i=0,nofglyphs-1 do local nameindex = readushort(f) if nameindex >= 258 then maxnames = maxnames + 1 nameindex = nameindex - 257 indices[nameindex] = i else glyphs[i].name = standardromanencoding[nameindex] end end for i=1,maxnames do local mapping = indices[i] if not mapping then report("quit post name fetching at %a of %a: %s",i,maxnames,"no index") break else local length = readbyte(f) if length > 0 then glyphs[mapping].name = readstring(f,length) else report("quit post name fetching at %a of %a: %s",i,maxnames,"overflow") break end end end elseif version == 2.5 then -- depricated, will be done when needed elseif version == 3.0 then -- no ps name information end else fontdata.postscript = { } end end readers.cff = function(f,fontdata,specification) if specification.glyphs then reportskippedtable("cff") end end -- Not all cmaps make sense .. e.g. dfont is obsolete and probably more are not relevant. Let's see -- what we run into. There is some weird calculation going on here because we offset in a table -- being a blob of memory or file. Anyway, I can't stand lunatic formats like this esp when there -- is no real gain. local formatreaders = { } local duplicatestoo = true local sequence = { { 3, 1, 4 }, { 3, 10, 12 }, { 0, 3, 4 }, { 0, 1, 4 }, -- { 0, 4, 12 }, { 0, 0, 6 }, { 3, 0, 6 }, -- variants { 0, 5, 14 }, -- last resort ranges { 3, 10, 13 }, } -- local sequence = { -- { 0, 1, 4 }, -- { 0, 4, 12 }, -- { 0, 3, 4 }, -- { 3, 1, 4 }, -- { 3, 10, 12 }, -- { 0, 0, 6 }, -- { 3, 0, 6 }, -- -- variants -- { 0, 5, 14 }, -- } local supported = { } for i=1,#sequence do local sp, se, sf = unpack(sequence[i]) local p = supported[sp] if not p then p = { } supported[sp] = p end local e = p[se] if not e then e = { } p[se] = e end e[sf] = true end formatreaders[4] = function(f,fontdata,offset) setposition(f,offset+2) -- skip format -- local length = readushort(f) -- in bytes of subtable local language = readushort(f) local nofsegments = readushort(f) / 2 -- skipshort(f,3) -- searchrange entryselector rangeshift -- local endchars = { } local startchars = { } local deltas = { } local offsets = { } local indices = { } local mapping = fontdata.mapping local glyphs = fontdata.glyphs local duplicates = fontdata.duplicates local nofdone = 0 -- for i=1,nofsegments do endchars[i] = readushort(f) end local reserved = readushort(f) -- 0 for i=1,nofsegments do startchars[i] = readushort(f) end for i=1,nofsegments do deltas[i] = readshort(f) end for i=1,nofsegments do offsets[i] = readushort(f) end -- format length language nofsegments searchrange entryselector rangeshift 4-tables local size = (length - 2 * 2 - 5 * 2 - 4 * nofsegments * 2) / 2 for i=1,size-1 do indices[i] = readushort(f) end -- for segment=1,nofsegments do local startchar = startchars[segment] local endchar = endchars[segment] local offset = offsets[segment] local delta = deltas[segment] if startchar == 0xFFFF and endchar == 0xFFFF then -- break elseif startchar == 0xFFFF and offset == 0 then -- break elseif offset == 0xFFFF then -- bad encoding elseif offset == 0 then if trace_cmap then report("format 4.%i segment %2i from %C upto %C at index %H",1,segment,startchar,endchar,(startchar + delta) % 65536) end for unicode=startchar,endchar do local index = (unicode + delta) % 65536 if index and index > 0 then local glyph = glyphs[index] if glyph then local gu = glyph.unicode if not gu then glyph.unicode = unicode nofdone = nofdone + 1 elseif gu ~= unicode then if duplicatestoo then local d = duplicates[gu] if d then d[unicode] = true else duplicates[gu] = { [unicode] = true } end else -- no duplicates ... weird side effects in lm report("duplicate case 1: %C %04i %s",unicode,index,glyphs[index].name) end end if not mapping[index] then mapping[index] = unicode end end end end else local shift = (segment-nofsegments+offset/2) - startchar if trace_cmap then report("format 4.%i segment %2i from %C upto %C at index %H",0,segment,startchar,endchar,(startchar + delta) % 65536) end for unicode=startchar,endchar do local slot = shift + unicode local index = indices[slot] if index and index > 0 then index = (index + delta) % 65536 local glyph = glyphs[index] if glyph then local gu = glyph.unicode if not gu then glyph.unicode = unicode nofdone = nofdone + 1 elseif gu ~= unicode then if duplicatestoo then local d = duplicates[gu] if d then d[unicode] = true else duplicates[gu] = { [unicode] = true } end else -- no duplicates ... weird side effects in lm report("duplicate case 2: %C %04i %s",unicode,index,glyphs[index].name) end end if not mapping[index] then mapping[index] = unicode end end end end end end return nofdone end formatreaders[6] = function(f,fontdata,offset) setposition(f,offset) -- + 2 + 2 + 2 -- skip format length language local format = readushort(f) local length = readushort(f) local language = readushort(f) local mapping = fontdata.mapping local glyphs = fontdata.glyphs local duplicates = fontdata.duplicates local start = readushort(f) local count = readushort(f) local stop = start+count-1 local nofdone = 0 if trace_cmap then report("format 6 from %C to %C",2,start,stop) end for unicode=start,stop do local index = readushort(f) if index > 0 then local glyph = glyphs[index] if glyph then local gu = glyph.unicode if not gu then glyph.unicode = unicode nofdone = nofdone + 1 elseif gu ~= unicode then -- report("format 6 overloading %C to %C",gu,unicode) -- glyph.unicode = unicode -- no duplicates ... weird side effects in lm end if not mapping[index] then mapping[index] = unicode end end end end return nofdone end formatreaders[12] = function(f,fontdata,offset) setposition(f,offset+2+2+4+4) -- skip format reserved length language local mapping = fontdata.mapping local glyphs = fontdata.glyphs local duplicates = fontdata.duplicates local nofgroups = readulong(f) local nofdone = 0 for i=1,nofgroups do local first = readulong(f) local last = readulong(f) local index = readulong(f) if trace_cmap then report("format 12 from %C to %C starts at index %i",first,last,index) end for unicode=first,last do local glyph = glyphs[index] if glyph then local gu = glyph.unicode if not gu then glyph.unicode = unicode nofdone = nofdone + 1 elseif gu ~= unicode then -- e.g. sourcehan fonts need this local d = duplicates[gu] if d then d[unicode] = true else duplicates[gu] = { [unicode] = true } end end if not mapping[index] then mapping[index] = unicode end end index = index + 1 end end return nofdone end formatreaders[13] = function(f,fontdata,offset) -- -- this fector is only used for simple fallback fonts -- setposition(f,offset+2+2+4+4) -- skip format reserved length language local mapping = fontdata.mapping local glyphs = fontdata.glyphs local duplicates = fontdata.duplicates local nofgroups = readulong(f) local nofdone = 0 for i=1,nofgroups do local first = readulong(f) local last = readulong(f) local index = readulong(f) if first < privateoffset then if trace_cmap then report("format 13 from %C to %C get index %i",first,last,index) end local glyph = glyphs[index] local unicode = glyph.unicode if not unicode then unicode = first glyph.unicode = unicode first = first + 1 end local list = duplicates[unicode] mapping[index] = unicode if not list then list = { } duplicates[unicode] = list end if last >= privateoffset then local limit = privateoffset - 1 report("format 13 from %C to %C pruned to %C",first,last,limit) last = limit end for unicode=first,last do list[unicode] = true end nofdone = nofdone + last - first + 1 else report("format 13 from %C to %C ignored",first,last) end end return nofdone end formatreaders[14] = function(f,fontdata,offset) if offset and offset ~= 0 then setposition(f,offset) local format = readushort(f) local length = readulong(f) local nofrecords = readulong(f) local records = { } local variants = { } local nofdone = 0 fontdata.variants = variants for i=1,nofrecords do records[i] = { selector = readuint(f), default = readulong(f), -- default offset other = readulong(f), -- non-default offset } end for i=1,nofrecords do local record = records[i] local selector = record.selector local default = record.default local other = record.other -- -- there is no need to map the defaults to themselves -- -- if default ~= 0 then -- setposition(f,offset+default) -- local nofranges = readulong(f) -- for i=1,nofranges do -- local start = readuint(f) -- local extra = readbyte(f) -- for i=start,start+extra do -- mapping[i] = i -- end -- end -- end local other = record.other if other ~= 0 then setposition(f,offset+other) local mapping = { } local count = readulong(f) for i=1,count do mapping[readuint(f)] = readushort(f) end nofdone = nofdone + count variants[selector] = mapping end end return nofdone else return 0 end end local function checkcmap(f,fontdata,records,platform,encoding,format) local data = records[platform] if not data then return 0 end data = data[encoding] if not data then return 0 end data = data[format] if not data then return 0 end local reader = formatreaders[format] if not reader then return 0 end local p = platforms[platform] local e = encodings[p] local n = reader(f,fontdata,data) or 0 report("cmap checked: platform %i (%s), encoding %i (%s), format %i, new unicodes %i",platform,p,encoding,e and e[encoding] or "?",format,n) return n end function readers.cmap(f,fontdata,specification) if specification.glyphs then local datatable = fontdata.tables.cmap if datatable then local tableoffset = datatable.offset setposition(f,tableoffset) local version = readushort(f) local noftables = readushort(f) local records = { } local unicodecid = false local variantcid = false local variants = { } local duplicates = fontdata.duplicates or { } fontdata.duplicates = duplicates for i=1,noftables do local platform = readushort(f) local encoding = readushort(f) local offset = readulong(f) local record = records[platform] if not record then records[platform] = { [encoding] = { offsets = { offset }, formats = { }, } } else local subtables = record[encoding] if not subtables then record[encoding] = { offsets = { offset }, formats = { }, } else local offsets = subtables.offsets offsets[#offsets+1] = offset end end end report("found cmaps:") for platform, record in sortedhash(records) do local p = platforms[platform] local e = encodings[p] local sp = supported[platform] local ps = p or "?" if sp then report(" platform %i: %s",platform,ps) else report(" platform %i: %s (unsupported)",platform,ps) end for encoding, subtables in sortedhash(record) do local se = sp and sp[encoding] local es = e and e[encoding] or "?" if se then report(" encoding %i: %s",encoding,es) else report(" encoding %i: %s (unsupported)",encoding,es) end local offsets = subtables.offsets local formats = subtables.formats for i=1,#offsets do local offset = tableoffset + offsets[i] setposition(f,offset) formats[readushort(f)] = offset end record[encoding] = formats local list = sortedkeys(formats) for i=1,#list do if not (se and se[list[i]]) then list[i] = list[i] .. " (unsupported)" end end report(" formats: % t",list) end end -- local ok = false for i=1,#sequence do local sp, se, sf = unpack(sequence[i]) if checkcmap(f,fontdata,records,sp,se,sf) > 0 then ok = true end end if not ok then report("no useable unicode cmap found") end -- fontdata.cidmaps = { version = version, noftables = noftables, records = records, } else fontdata.cidmaps = { } end end end -- The glyf table depends on the loca table. We have one entry to much in the locations table (the -- last one is a dummy) because we need to calculate the size of a glyph blob from the delta, -- although we not need it in our usage (yet). We can remove the locations table when we're done. function readers.loca(f,fontdata,specification) if specification.glyphs then reportskippedtable("loca") end end function readers.glyf(f,fontdata,specification) -- part goes to cff module if specification.glyphs then reportskippedtable("glyf") end end -- Experimental (we need fonts). function readers.colr(f,fontdata,specification) if specification.glyphs then reportskippedtable("colr") end end function readers.cpal(f,fontdata,specification) if specification.glyphs then reportskippedtable("cpal") end end function readers.svg(f,fontdata,specification) if specification.glyphs then reportskippedtable("svg") end end -- Here we have a table that we really need for later processing although a more advanced gpos table -- can also be available. Todo: we need a 'fake' lookup for this (analogue to ff). function readers.kern(f,fontdata,specification) if specification.kerns then local datatable = fontdata.tables.kern if datatable then setposition(f,datatable.offset) local version = readushort(f) local noftables = readushort(f) for i=1,noftables do local version = readushort(f) local length = readushort(f) local coverage = readushort(f) -- bit 8-15 of coverage: format 0 or 2 local format = bit32.rshift(coverage,8) -- is this ok? if format == 0 then local nofpairs = readushort(f) local searchrange = readushort(f) local entryselector = readushort(f) local rangeshift = readushort(f) local kerns = { } local glyphs = fontdata.glyphs for i=1,nofpairs do local left = readushort(f) local right = readushort(f) local kern = readfword(f) local glyph = glyphs[left] local kerns = glyph.kerns if kerns then kerns[right] = kern else glyph.kerns = { [right] = kern } end end elseif format == 2 then report("todo: kern classes") else report("todo: kerns") end end end end end function readers.gdef(f,fontdata,specification) if specification.details then reportskippedtable("gdef") end end function readers.gsub(f,fontdata,specification) if specification.details then reportskippedtable("gsub") end end function readers.gpos(f,fontdata,specification) if specification.details then reportskippedtable("gpos") end end function readers.math(f,fontdata,specification) if specification.glyphs then reportskippedtable("math") end end -- Now comes the loader. The order of reading these matters as we need to know -- some properties in order to read following tables. When details is true we also -- initialize the glyphs data. local function getinfo(maindata,sub,platformnames,rawfamilynames,metricstoo) local fontdata = sub and maindata.subfonts and maindata.subfonts[sub] or maindata local names = fontdata.names local info = nil if names then local metrics = fontdata.windowsmetrics or { } local postscript = fontdata.postscript or { } local fontheader = fontdata.fontheader or { } local cffinfo = fontdata.cffinfo or { } local filename = fontdata.filename local weight = getname(fontdata,"weight") or (cffinfo and cffinfo.weight) or (metrics and metrics.weight) local width = getname(fontdata,"width") or (cffinfo and cffinfo.width ) or (metrics and metrics.width ) local fontname = getname(fontdata,"postscriptname") local fullname = getname(fontdata,"fullname") local family = getname(fontdata,"family") local subfamily = getname(fontdata,"subfamily") local familyname = getname(fontdata,"typographicfamily") local subfamilyname = getname(fontdata,"typographicsubfamily") local compatiblename = getname(fontdata,"compatiblefullname") -- kind of useless if rawfamilynames then -- for PG (for now, as i need to check / adapt context to catch a no-fallback case) else if not familyname then familyname = family end if not subfamilyname then subfamilyname = subfamily end end info = { -- we inherit some inconsistencies/choices from ff subfontindex = fontdata.subfontindex or sub or 0, -- filename = filename, version = getname(fontdata,"version"), -- format = fontdata.format, fontname = fontname, fullname = fullname, -- cfffullname = cff.fullname, family = family, subfamily = subfamily, familyname = familyname, subfamilyname = subfamilyname, compatiblename = compatiblename, weight = weight and lower(weight), width = width and lower(width), pfmweight = metrics.weightclass or 400, -- will become weightclass pfmwidth = metrics.widthclass or 5, -- will become widthclass panosewidth = metrics.panosewidth, panoseweight = metrics.panoseweight, italicangle = postscript.italicangle or 0, units = fontheader.units or 0, designsize = fontdata.designsize, minsize = fontdata.minsize, maxsize = fontdata.maxsize, monospaced = (tonumber(postscript.monospaced or 0) > 0) or metrics.panosewidth == "monospaced", averagewidth = metrics.averagewidth, xheight = metrics.xheight, capheight = metrics.capheight, -- not always present and probably crap ascender = metrics.typoascender, descender = metrics.typodescender, platformnames = platformnames and fontdata.platformnames or nil, } if metricstoo then local keys = { "version", "ascender", "descender", "linegap", -- "caretoffset", "caretsloperise", "caretsloperun", "maxadvancewidth", "maxadvanceheight", "maxextent", -- "metricdataformat", "minbottomsidebearing", "mintopsidebearing", } local h = fontdata.horizontalheader or { } local v = fontdata.verticalheader or { } if h then local th = { } local tv = { } for i=1,#keys do local key = keys[i] th[key] = h[key] or 0 tv[key] = v[key] or 0 end info.horizontalmetrics = th info.verticalmetrics = tv end end elseif n then info = { filename = fontdata.filename, comment = "there is no info for subfont " .. n, } else info = { filename = fontdata.filename, comment = "there is no info", } end -- inspect(info) return info end local function loadtables(f,specification,offset) if offset then setposition(f,offset) end local tables = { } local basename = file.basename(specification.filename) local filesize = specification.filesize local filetime = specification.filetime local fontdata = { -- some can/will go filename = basename, filesize = filesize, filetime = filetime, version = readstring(f,4), noftables = readushort(f), searchrange = readushort(f), -- not needed entryselector = readushort(f), -- not needed rangeshift = readushort(f), -- not needed tables = tables, } for i=1,fontdata.noftables do local tag = lower(stripstring(readstring(f,4))) local checksum = readulong(f) -- not used local offset = readulong(f) local length = readulong(f) if offset + length > filesize then report("bad %a table in file %a",tag,basename) end tables[tag] = { checksum = checksum, offset = offset, length = length, } end if tables.cff then fontdata.format = "opentype" else fontdata.format = "truetype" end return fontdata end local function prepareglyps(fontdata) local glyphs = setmetatableindex(function(t,k) local v = { -- maybe more defaults index = k, } t[k] = v return v end) fontdata.glyphs = glyphs fontdata.mapping = { } end local function readdata(f,offset,specification) local fontdata = loadtables(f,specification,offset) if specification.glyphs then prepareglyps(fontdata) end -- readers["name"](f,fontdata,specification) -- local askedname = specification.askedname if askedname then local fullname = getname(fontdata,"fullname") or "" local cleanname = gsub(askedname,"[^a-zA-Z0-9]","") local foundname = gsub(fullname,"[^a-zA-Z0-9]","") if lower(cleanname) ~= lower(foundname) then return -- keep searching end end -- -- readers["os/2"](f,fontdata,specification) readers["head"](f,fontdata,specification) readers["maxp"](f,fontdata,specification) readers["hhea"](f,fontdata,specification) readers["vhea"](f,fontdata,specification) readers["hmtx"](f,fontdata,specification) readers["vmtx"](f,fontdata,specification) readers["vorg"](f,fontdata,specification) readers["post"](f,fontdata,specification) readers["cff" ](f,fontdata,specification) readers["cmap"](f,fontdata,specification) readers["loca"](f,fontdata,specification) readers["glyf"](f,fontdata,specification) readers["colr"](f,fontdata,specification) readers["cpal"](f,fontdata,specification) readers["svg" ](f,fontdata,specification) readers["kern"](f,fontdata,specification) readers["gdef"](f,fontdata,specification) readers["gsub"](f,fontdata,specification) readers["gpos"](f,fontdata,specification) readers["math"](f,fontdata,specification) -- fontdata.locations = nil fontdata.tables = nil fontdata.cidmaps = nil fontdata.dictionaries = nil -- fontdata.cff = nil return fontdata end local function loadfontdata(specification) local filename = specification.filename local fileattr = lfs.attributes(filename) local filesize = fileattr and fileattr.size or 0 local filetime = fileattr and fileattr.modification or 0 local f = openfile(filename,true) -- zero based if not f then report("unable to open %a",filename) elseif filesize == 0 then report("empty file %a",filename) closefile(f) else specification.filesize = filesize specification.filetime = filetime local version = readstring(f,4) local fontdata = nil if version == "OTTO" or version == "true" or version == "\0\1\0\0" then fontdata = readdata(f,0,specification) elseif version == "ttcf" then local subfont = tonumber(specification.subfont) local offsets = { } local ttcversion = readulong(f) local nofsubfonts = readulong(f) for i=1,nofsubfonts do offsets[i] = readulong(f) end if subfont then -- a number of not if subfont >= 1 and subfont <= nofsubfonts then fontdata = readdata(f,offsets[subfont],specification) else report("no subfont %a in file %a",subfont,filename) end else subfont = specification.subfont if type(subfont) == "string" and subfont ~= "" then specification.askedname = subfont for i=1,nofsubfonts do fontdata = readdata(f,offsets[i],specification) if fontdata then fontdata.subfontindex = i report("subfont named %a has index %a",subfont,i) break end end if not fontdata then report("no subfont named %a",subfont) end else local subfonts = { } fontdata = { filename = filename, filesize = filesize, filetime = filetime, version = version, subfonts = subfonts, ttcversion = ttcversion, nofsubfonts = nofsubfonts, } for i=1,nofsubfonts do subfonts[i] = readdata(f,offsets[i],specification) end end end else report("unknown version %a in file %a",version,filename) end closefile(f) return fontdata or { } end end local function loadfont(specification,n) if type(specification) == "string" then specification = { filename = specification, info = true, -- always true (for now) details = true, glyphs = true, shapes = true, kerns = true, globalkerns = true, lookups = true, -- true or number: subfont = n or true, tounicode = false, } end -- if shapes only then if specification.shapes or specification.lookups or specification.kerns then specification.glyphs = true end if specification.glyphs then specification.details = true end if specification.details then specification.info = true -- not really used any more end if specification.platformnames then specification.platformnames = true -- not really used any more end local function message(str) report("fatal error in file %a: %s\n%s",specification.filename,str,debug.traceback()) end local ok, result = xpcall(loadfontdata,message,specification) if ok then return result end end -- we need even less, but we can have a 'detail' variant function readers.loadshapes(filename,n) local fontdata = loadfont { filename = filename, shapes = true, subfont = n, } if fontdata then -- easier on luajit but still we can hit the 64 K stack constants issue for k, v in next, fontdata.glyphs do v.class = nil v.index = nil v.math = nil -- v.name = nil end end return fontdata and { -- version = 0.123 -- todo filename = filename, format = fontdata.format, glyphs = fontdata.glyphs, units = fontdata.fontheader.units, } or { filename = filename, format = "unknown", glyphs = { }, units = 0, } end function readers.loadfont(filename,n) local fontdata = loadfont { filename = filename, glyphs = true, shapes = false, lookups = true, -- kerns = true, -- globalkerns = true, -- only for testing, e.g. cambria has different gpos and kern subfont = n, } if fontdata then -- return { tableversion = tableversion, creator = "context mkiv", size = fontdata.filesize, time = fontdata.filetime, glyphs = fontdata.glyphs, descriptions = fontdata.descriptions, format = fontdata.format, goodies = { }, metadata = getinfo(fontdata,n,false,false,true), -- no platformnames here ! properties = { hasitalics = fontdata.hasitalics or false, maxcolorclass = fontdata.maxcolorclass, hascolor = fontdata.hascolor or false, }, resources = { -- filename = fontdata.filename, filename = filename, private = privateoffset, duplicates = fontdata.duplicates or { }, features = fontdata.features or { }, -- we need to add these in the loader sublookups = fontdata.sublookups or { }, -- we need to add these in the loader marks = fontdata.marks or { }, -- we need to add these in the loader markclasses = fontdata.markclasses or { }, -- we need to add these in the loader marksets = fontdata.marksets or { }, -- we need to add these in the loader sequences = fontdata.sequences or { }, -- we need to add these in the loader variants = fontdata.variants, -- variant -> unicode -> glyph version = getname(fontdata,"version"), cidinfo = fontdata.cidinfo, mathconstants = fontdata.mathconstants, colorpalettes = fontdata.colorpalettes, svgshapes = fontdata.svgshapes, }, } end end function readers.getinfo(filename,specification) -- string, nil|number|table -- platformnames is optional and not used by context (a too unpredictable mess -- that only add to the confusion) .. so it's only for checking things local subfont = nil local platformnames = false local rawfamilynames = false if type(specification) == "table" then subfont = tonumber(specification.subfont) platformnames = specification.platformnames rawfamilynames = specification.rawfamilynames else subfont = tonumber(specification) end local fontdata = loadfont { filename = filename, details = true, platformnames = platformnames, -- rawfamilynames = rawfamilynames, } if fontdata then local subfonts = fontdata.subfonts if not subfonts then return getinfo(fontdata,nil,platformnames,rawfamilynames) elseif not subfont then local info = { } for i=1,#subfonts do info[i] = getinfo(fontdata,i,platformnames,rawfamilynames) end return info elseif subfont >= 1 and subfont <= #subfonts then return getinfo(fontdata,subfont,platformnames,rawfamilynames) else return { filename = filename, comment = "there is no subfont " .. subfont .. " in this file" } end else return { filename = filename, comment = "the file cannot be opened for reading", } end end function readers.rehash(fontdata,hashmethod) report("the %a helper is not yet implemented","rehash") end function readers.checkhash(fontdata) report("the %a helper is not yet implemented","checkhash") end function readers.pack(fontdata,hashmethod) report("the %a helper is not yet implemented","pack") end function readers.unpack(fontdata) report("the %a helper is not yet implemented","unpack") end function readers.expand(fontdata) report("the %a helper is not yet implemented","unpack") end function readers.compact(fontdata) report("the %a helper is not yet implemented","compact") end -- plug in local extenders = { } function readers.registerextender(extender) extenders[#extenders+1] = extender end function readers.extend(fontdata) for i=1,#extenders do local extender = extenders[i] local name = extender.name or "unknown" local action = extender.action if action then action(fontdata) end end end