diff options
| author | Philipp Gesang <phg42.2a@gmail.com> | 2013-04-17 14:53:36 +0200 | 
|---|---|---|
| committer | Philipp Gesang <phg42.2a@gmail.com> | 2013-04-17 14:53:36 +0200 | 
| commit | ac2b107faa5ca91292a3c63fd8b15ca16b817f70 (patch) | |
| tree | 30cb81770fdd135cc3676f6ca2da5e234da0c32d | |
| parent | e9e2d85586078a00e28b8df1c8ccd858c7dc0bf6 (diff) | |
| download | luaotfload-ac2b107faa5ca91292a3c63fd8b15ca16b817f70.tar.gz | |
add approximate matching option  for mkluatexfontdb.lua --find
| -rwxr-xr-x | mkluatexfontdb.lua | 40 | ||||
| -rw-r--r-- | otfl-font-nms.lua | 200 | 
2 files changed, 193 insertions, 47 deletions
diff --git a/mkluatexfontdb.lua b/mkluatexfontdb.lua index d11db53..80c0951 100755 --- a/mkluatexfontdb.lua +++ b/mkluatexfontdb.lua @@ -57,6 +57,8 @@ Valid options:    -h --help                    print this message    --find="font name"           query the database for a font name +  -F --fuzzy                   look for approximate matches if --find fails +    --log=stdout                 redirect log output to stdout  The font database will be saved to @@ -109,7 +111,6 @@ actions.generate = function (job)      logs.names_report("log", 0, "fonts in the database",                        "%i", #fontnames.mappings)      savedname = names.save(fontnames) -    texiowrite_nl""      if savedname then --- FIXME have names.save return bool          return true, true      end @@ -117,22 +118,32 @@ actions.generate = function (job)  end  actions.query = function (job) + +    local query = job.query      local tmpspec = { -        name          = job.query, +        name          = query,          lookup        = "name", -        specification = "name:"..job.query, +        specification = "name:" .. query, +        optsize       = 0,      } -    local foundname, _whatever, success = fonts.names.resolve(nil, nil, tmpspec) + +    local foundname, _whatever, success = +        fonts.names.resolve(nil, nil, tmpspec) +      if success then          logs.names_report(false, 0, -            "resolve", "Font “%s” found!", job.query) +            "resolve", "Font “%s” found!", query)          logs.names_report(false, 0,              "resolve", "Resolved file name “%s”:", foundname)      else          logs.names_report(false, 0, -            "resolve", "Cannot find “%s”.", job.query) +            "resolve", "Cannot find “%s”.", query) +        if job.fuzzy == true then +            logs.names_report(false, 2, +                "resolve", "Looking for close matches, this may take a while ...") +            local success = fonts.names.find_closest(query, job.fuzzy_limit) +        end      end -    texiowrite_nl""      return true, true  end @@ -161,9 +172,11 @@ local process_cmdline = function ( ) -- unit -> jobspec          verbose          = 1  ,          version          = "V",          find             = 1, +        fuzzy            = "F", +        limit            = 1,      } -    local short_options = "fqvVh" +    local short_options = "fFqvVh"      local options, _, optarg =          alt_getopt.get_ordered_opts (arg, short_options, long_options) @@ -198,6 +211,13 @@ local process_cmdline = function ( ) -- unit -> jobspec          elseif v == "find" then              action_pending["query"] = true              result.query = optarg[n] +        elseif v == "F" then +            result.fuzzy = true +        elseif v == "limit" then +            local lim = optarg[n] +            if lim then +                result.fuzzy_limit = tonumber(lim) +            end          end      end      return result @@ -207,7 +227,8 @@ local main = function ( ) -- unit -> int      local retval    = 0      local job       = process_cmdline() -    --inspect(action_pending) +--    inspect(action_pending) +--    inspect(job)      for i=1, #action_sequence do          local actionname = action_sequence[i] @@ -236,6 +257,7 @@ local main = function ( ) -- unit -> int          if exit then break end      end +    texiowrite_nl""      return retval  end diff --git a/otfl-font-nms.lua b/otfl-font-nms.lua index 254519d..e9df548 100644 --- a/otfl-font-nms.lua +++ b/otfl-font-nms.lua @@ -17,6 +17,7 @@ local iolines                 = io.lines  local ioopen                  = io.open  local kpseexpand_path         = kpse.expand_path  local mathabs                 = math.abs +local mathmin                 = math.min  local stringfind              = string.find  local stringformat            = string.format  local stringgmatch            = string.gmatch @@ -24,13 +25,15 @@ local stringgsub              = string.gsub  local stringlower             = string.lower  local stringsub               = string.sub  local stringupper             = string.upper +local tableappend             = table.append   -- TODO get rid of +local tableconcat             = table.concat +local tablecontains           = table.contains -- TODO get rid of +local tablecopy               = table.copy +local tablesort               = table.sort +local tabletofile             = table.tofile  local texiowrite_nl           = texio.write_nl  local utf8gsub                = unicode.utf8.gsub  local utf8lower               = unicode.utf8.lower -local tablecontains           = table.contains -- TODO get rid of -local tableappend             = table.append   -- TODO get rid of -local tabletofile             = table.tofile -local tablecopy               = table.copy  --- these come from Lualibs/Context  local dirglob                 = dir.glob @@ -99,8 +102,6 @@ end  --- When loading a lua file we try its binary complement first, which  --- is assumed to be located at an identical path, carrying the suffix  --- .luc. ---- Furthermore, we memoize loaded files along the way to avoid ---- duplication.  local code_cache = { } @@ -135,13 +136,15 @@ local load_lua_file = function (path)  end  --- define locals in scope +local find_closest +local font_fullinfo  local load_names +local read_fonts_conf +local reload_db +local resolve  local save_names  local scan_external_dir  local update_names -local read_fonts_conf -local resolve -  load_names = function ( )      local path            = filejoin(names.path.dir, names.path.basename) @@ -160,6 +163,8 @@ load_names = function ( )      return data  end +local fuzzy_limit = 1 --- display closest only +  local synonyms = {      regular    = { "normal",        "roman",                     "plain",         "book", @@ -179,8 +184,9 @@ local synonyms = {                     "semibolditalic",                   },  } -local loaded   = false -local reloaded = false +--- state of the database +local fonts_loaded   = false +local fonts_reloaded = false  --[[doc-- @@ -212,8 +218,11 @@ font database created by the mkluatexfontdb script.  --- font  --- 'a -> 'a -> table -> (string * string | bool * bool) ---- note by phg: I added a third return value that indicates a ---- successful lookup as this cannot be inferred from the other values. +--- +---     note by phg: I added a third return value that indicates a +---     successful lookup as this cannot be inferred from the other +---     values. +---  resolve = function (_,_,specification) -- the 1st two parameters are used by ConTeXt      local name  = sanitize_string(specification.name)      local style = sanitize_string(specification.style) or "regular" @@ -225,9 +234,9 @@ resolve = function (_,_,specification) -- the 1st two parameters are used by Con          size = specification.size / 65536      end -    if not loaded then -        names.data = load_names() -        loaded     = true +    if not fonts_loaded then +        names.data   = load_names() +        fonts_loaded = true      end      local data = names.data @@ -235,6 +244,8 @@ resolve = function (_,_,specification) -- the 1st two parameters are used by Con          if data.mappings then              local found = { }              for _,face in next, data.mappings do +                --- TODO we really should store those in dedicated +                --- .sanitized field                  local family    = sanitize_string(face.names and face.names.family)                  local subfamily = sanitize_string(face.names and face.names.subfamily)                  local fullname  = sanitize_string(face.names and face.names.fullname) @@ -333,38 +344,150 @@ resolve = function (_,_,specification) -- the 1st two parameters are used by Con              elseif found.fallback then                  return found.fallback.filename[1], found.fallback.filename[2], true              end -            -- no font found so far -            if not reloaded then -                -- try reloading the database -                names.data = update_names(names.data) -                save_names(names.data) -                reloaded   = true -                return resolve(_,_,specification) +            --- no font found so far +            if not fonts_reloaded then +                --- last straw: try reloading the database +                return reload_db(resolve, nil, nil, specification)              else -                -- else, fallback to requested name -                -- XXX: specification.name is empty with absolute paths, looks -                -- like a bug in the specification parser +                --- else, fallback to requested name +                --- XXX: specification.name is empty with absolute paths, looks +                --- like a bug in the specification parser                  return specification.name, false, false              end          end -    else -        if not reloaded then -            names.data = update_names() -            save_names(names.data) -            reloaded   = true -            return resolve(_,_,specification) -        else + +    else --- no db or outdated; reload names and retry +        if not fonts_reloaded then +            return reload_db(resolve, nil, nil, specification) +        else --- unsucessfully reloaded; bail              return specification.name, false, false          end      end +end --- resolve() + +--- when reload is triggered we update the database +--- and then re-run the caller with the arg list + +--- ('a -> 'a) -> 'a list -> 'a +reload_db = function (caller, ...) +    report("log", 1, "db", "reload initiated") +    names.data = update_names() +    save_names(names.data) +    fonts_reloaded = true +    return caller(...)  end +--- string -> string -> int +local iterative_levenshtein = function (s1, s2) + +  local costs = { } +  local len1, len2 = #s1, #s2 + +  for i = 0, len1 do +    local last = i +    for j = 0, len2 do +      if i == 0 then +        costs[j] = j +      else +        if j > 0 then +          local current = costs[j-1] +          if stringsub(s1, i, i) ~= stringsub(s2, j, j) then +            current = mathmin(current, last, costs[j]) + 1 +          end +          costs[j-1] = last +          last = current +        end +      end +    end +    if i > 0 then costs[len2] = last end +  end + +  return costs[len2]--- lower right has the distance +end + +--- string -> int -> bool +find_closest = function (name, limit) +    local name     = sanitize_string(name) +    limit          = limit or fuzzy_limit + +    if not fonts_loaded then +        names.data = load_names() +        fonts_loaded     = true +    end + +    local data = names.data + +    if type(data) == "table" then +        local by_distance   = { } --- (int, string list) dict +        local distances     = { } --- int list +        local cached        = { } --- (string, int) dict +        local mappings      = data.mappings +        local n_fonts       = #mappings + +        for n = 1, n_fonts do +            local current    = mappings[n] +            local cnames     = current.names +            --[[ +                This is simplistic but surpisingly fast. +                Matching is performed against the “family” name +                of a db record. We then store its “fullname” at +                it edit distance. +                We should probably do some weighting over all the +                font name categories as well as whatever agrep +                does. +            --]] +            if cnames then +                local fullname, family = cnames.fullname, cnames.family +                family = sanitize_string(family) + +                local dist = cached[family]--- maybe already calculated +                if not dist then +                    dist = iterative_levenshtein(name, family) +                    cached[family] = dist +                end +                local namelst = by_distance[dist] +                if not namelst then --- first entry +                    namelst = { fullname } +                    distances[#distances+1] = dist +                else --- append +                    namelst[#namelst+1] = fullname +                end +                by_distance[dist] = namelst +            end +        end + +        --- print the matches according to their distance +        local n_distances = #distances +        if n_distances > 0 then --- got some data +            tablesort(distances) +            limit = mathmin(n_distances, limit) +            report(false, 1, "query", +                    "displaying %d distance levels", limit) + +            for i = 1, limit do +                local dist     = distances[i] +                local namelst  = by_distance[dist] +                report(false, 0, "query", +                    "distance from “" .. name .. "”: " .. dist +                 .. "\n    " .. tableconcat(namelst, "\n    ") +                ) +            end + +            return true +        end +        return false +    else --- need reload +        return reload_db(find_closest, name) +    end +    return false +end --- find_closest() +  --[[doc--  The data inside an Opentype font file can be quite heterogeneous.  Thus in order to get the relevant information, parts of the original  table as returned by the font file reader need to be relocated.  --doc]]-- -local font_fullinfo = function (filename, subfont, texmf) +font_fullinfo = function (filename, subfont, texmf)      local tfmdata = { }      local rawfont = fontloader.open(filename, subfont)      if not rawfont then @@ -825,11 +948,11 @@ end  scan_external_dir = function (dir)      local old_names, new_names -    if loaded then +    if fonts_loaded then          old_names = names.data      else          old_names = load_names() -        loaded    = true +        fonts_loaded    = true      end      new_names = tablecopy(old_names)      scan_dir(dir, old_names, new_names) @@ -842,8 +965,9 @@ names.load   = load_names  names.update = update_names  names.save   = save_names -names.resolve     = resolve --- replace the resolver from luatex-fonts -names.resolvespec = resolve +names.resolve      = resolve --- replace the resolver from luatex-fonts +names.resolvespec  = resolve +names.find_closest = find_closest  --- dummy required by luatex-fonts (cf. luatex-fonts-syn.lua)  | 
