diff options
Diffstat (limited to 'tex/context/base/mkiv/publ-dat.lua')
-rw-r--r-- | tex/context/base/mkiv/publ-dat.lua | 1210 |
1 files changed, 1210 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/publ-dat.lua b/tex/context/base/mkiv/publ-dat.lua new file mode 100644 index 000000000..b9c43ac44 --- /dev/null +++ b/tex/context/base/mkiv/publ-dat.lua @@ -0,0 +1,1210 @@ +if not modules then modules = { } end modules ['publ-dat'] = { + version = 1.001, + comment = "this module part of publication support", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- todo: strip the @ in the lpeg instead of on do_definition and do_shortcut +-- todo: store bibroot and bibrootdt +-- todo: dataset = datasets[dataset] => current = datasets[dataset] +-- todo: maybe split this file + +--[[ldx-- +<p>This is a prelude to integrated bibliography support. This file just loads +bibtex files and converts them to xml so that the we access the content +in a convenient way. Actually handling the data takes place elsewhere.</p> +--ldx]]-- + +if not characters then + dofile(resolvers.findfile("char-utf.lua")) + dofile(resolvers.findfile("char-tex.lua")) +end + +local chardata = characters.data +local lowercase = characters.lower + +local lower, find, sub = string.lower, string.find, string.sub +local concat, copy, tohash = table.concat, table.copy, table.tohash +local next, type, rawget = next, type, rawget +local utfchar = utf.char +local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns +local textoutf = characters and characters.tex.toutf +local settings_to_hash, settings_to_array = utilities.parsers.settings_to_hash, utilities.parsers.settings_to_array +local formatters = string.formatters +local sortedkeys, sortedhash, keys = table.sortedkeys, table.sortedhash, table.keys +local xmlcollected, xmltext, xmlconvert = xml.collected, xml.text, xml.convert +local setmetatableindex = table.setmetatableindex + +-- todo: more allocate + +local P, R, S, V, C, Cc, Cs, Ct, Carg, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Carg, lpeg.Cmt, lpeg.Cp + +local p_whitespace = lpegpatterns.whitespace +local p_utf8character = lpegpatterns.utf8character + +local trace = false trackers.register("publications", function(v) trace = v end) +local trace_duplicates = true trackers.register("publications.duplicates", function(v) trace = v end) + +local report = logs.reporter("publications") +local report_duplicates = logs.reporter("publications","duplicates") + +local allocate = utilities.storage.allocate + +local commands = commands +local implement = interfaces and interfaces.implement + +publications = publications or { } +local publications = publications + +local datasets = publications.datasets or { } +publications.datasets = datasets + +local writers = publications.writers or { } +publications.writers = writers + +local tables = publications.tables or { } +publications.tables = tables + +publications.statistics = publications.statistics or { } +local publicationsstats = publications.statistics + +local loaders = publications.loaders or { } +publications.loaders = loaders + +local casters = { } +publications.casters = casters + +-- local sorters = { } +-- publications.sorters = sorters +-- +-- local indexers = { } +-- publications.indexers = indexers + +local components = { } +publications.components = components -- register components + +local enhancers = publications.enhancers or { } +publications.enhancers = enhancers + +local enhancer = publications.enhancer or utilities.sequencers.new { arguments = "dataset" } +publications.enhancer = enhancer + +utilities.sequencers.appendgroup(enhancer,"system") -- private + +publicationsstats.nofbytes = 0 +publicationsstats.nofdefinitions = 0 +publicationsstats.nofshortcuts = 0 +publicationsstats.nofdatasets = 0 + +local privates = allocate { + category = true, + tag = true, + index = true, + suffix = true, + specification = true, +} + +local specials = allocate { + key = true, + crossref = true, + keywords = true, + language = true, + comment = true, +} + +local implicits = allocate { + category = "implicit", + tag = "implicit", + key = "implicit", + keywords = "implicit", + language = "implicit", + crossref = "implicit", +} + +local origins = allocate { + "optional", + "extra", + "required", + "virtual", +} + +local virtuals = allocate { + "authoryear", + "authoryears", + "authornum", + "num", + "suffix", +} + +local defaulttypes = allocate { + author = "author", + editor = "author", + publisher = "author", + page = "pagenumber", + pages = "pagenumber", + keywords = "keyword", + doi = "url", + url = "url", +} + +local defaultsets = allocate { + page = { "page", "pages" }, +} + +tables.implicits = implicits +tables.origins = origins +tables.virtuals = virtuals +tables.types = defaulttypes +tables.sets = defaultsets +tables.privates = privates +tables.specials = specials + +local variables = interfaces and interfaces.variables or setmetatableindex("self") + +local v_all = variables.all +local v_default = variables.default + +if not publications.usedentries then + function publications.usedentries() + return { } + end +end + +local xmlplaceholder = "<?xml version='1.0' standalone='yes'?>\n<bibtex></bibtex>" + +local defaultshortcuts = allocate { + jan = "1", + feb = "2", + mar = "3", + apr = "4", + may = "5", + jun = "6", + jul = "7", + aug = "8", + sep = "9", + oct = "10", + nov = "11", + dec = "12", +} + +local space = p_whitespace^0 +local separator = space * "+" * space +local l_splitter = lpeg.tsplitat(separator) +local d_splitter = lpeg.splitat (separator) + +local unknownfield = function(t,k) + local v = "extra" + t[k] = v + return v +end + +local unknowncategory = function(t,k) + local v = { + required = false, + optional = false, + virtual = false, + fields = setmetatableindex(unknownfield), -- this will remember them + types = unknowntypes, + sets = setmetatableindex(defaultsets), -- new, but rather small + } + t[k] = v + return v +end + +local unknowntype = function(t,k) + local v = "string" + t[k] = v + return v +end + +local default = { + name = name, + version = "1.00", + comment = "unknown specification.", + author = "anonymous", + copyright = "no one", + categories = setmetatableindex(unknowncategory), + types = setmetatableindex(defaulttypes,unknowntype), +} + +-- maybe at some point we can have a handlers table with per field +-- a found, fetch, ... method + +local function checkfield(specification,category,data) + local list = setmetatableindex({},implicits) + data.fields = list + data.category = category + local sets = data.sets or { } + for i=1,#origins do + local t = origins[i] + local d = data[t] + if d then + for i=1,#d do + local di = d[i] + di = sets[di] or di + if type(di) == "table" then + for i=1,#di do + list[di[i]] = t + end + else + list[di] = t + end + end + else + data[t] = { } + end + end + return data +end + +local specifications = setmetatableindex(function(t,name) + if not name then + return default -- initializer + end + local filename = formatters["publ-imp-%s.lua"](name) + local fullname = resolvers.findfile(filename) or "" + if fullname == "" then + report("no data definition file %a for %a",filename,name) + return default + end + local specification = table.load(fullname) + if not specification then + report("invalid data definition file %a for %a",fullname,name) + return default + end + -- + local categories = specification.categories + if not categories then + categories = { } + specification.categories = categories + end + setmetatableindex(categories,unknowncategory) + -- + local types = specification.types + if not types then + types = defaulttypes + specification.types = types + end + setmetatableindex(types,unknowntype) + -- + local fields = setmetatableindex(unknownfield) + specification.fields = fields + -- + local virtual = specification.virtual + if virtual == nil then -- so false is valid + virtual = { } + elseif virtual == false then + virtual = { } + elseif type(virtual) ~= table then + virtual = virtuals + end + specification.virtual = virtual + specification.virtualfields = tohash(virtual) + -- + for category, data in next, categories do + categories[category] = checkfield(specification,category,copy(data)) -- we make sure we have no clones + end + -- + t[name] = specification + -- + return specification +end) + +publications.specifications = specifications + +function publications.setcategory(target,category,data) + local specification = specifications[target] + specification.categories[category] = checkfield(specification,category,data) +end + +function publications.parenttag(dataset,tag) + if not dataset or not tag then + report("error in specification, dataset %a, tag %a",dataset,tag) + elseif find(tag,"%+") then + local tags = lpegmatch(l_splitter,tag) + local parent = tags[1] + local current = datasets[dataset] + local luadata = current.luadata + local details = current.details + local first = luadata[parent] + if first then + local detail = details[parent] + local children = detail.children + if not children then + children = { } + detail.children = children + end + -- add new ones but only once + for i=2,#tags do + local tag = tags[i] + for j=1,#children do + if children[j] == tag then + tag = false + end + end + if tag then + local entry = luadata[tag] + if entry then + local detail = details[tag] + children[#children+1] = tag + if detail.parent then + report("error in combination, dataset %a, tag %a, parent %a, ignored %a",dataset,tag,detail.parent,parent) + else + report("combining, dataset %a, tag %a, parent %a",dataset,tag,parent) + detail.parent = parent + end + end + end + end + return parent + end + end + return tag or "" +end + +function publications.new(name) + publicationsstats.nofdatasets = publicationsstats.nofdatasets + 1 + local dataset = { + name = name or "dataset " .. publicationsstats.nofdatasets, + nofentries = 0, + shortcuts = { }, + luadata = { }, + suffixes = { }, + xmldata = xmlconvert(xmlplaceholder), + details = { }, + ordered = { }, + nofbytes = 0, + entries = nil, -- empty == all + sources = { }, + loaded = { }, + fields = { }, + userdata = { }, + used = { }, + commands = { }, -- for statistical purposes + citestate = { }, + status = { + resources = false, + userdata = false, + }, + specifications = { + -- used specifications + }, + suffixed = false, + } + -- we delay details till we need it (maybe we just delay the + -- individual fields but that is tricky as there can be some + -- depedencies) + return dataset +end + +setmetatableindex(datasets,function(t,k) + if type(k) == "table" then + return k -- so we can use this accessor as checker + else + local v = publications.new(k) + datasets[k] = v + return v + end +end) + +local function getindex(dataset,luadata,tag) + local found = luadata[tag] + if found then + local index = found.index or 0 + dataset.ordered[tag] = index + return index + else + local index = dataset.nofentries + 1 + dataset.nofentries = index + dataset.ordered[index] = tag + return index + end +end + +publications.getindex = getindex + +do + + -- we apply some normalization + + local space = S(" \t\n\r\f") -- / " " + local collapsed = space^1/" " + local csletter = lpegpatterns.csletter or R("az","AZ") + + ----- command = P("\\") * Cc("btxcmd{") * (R("az","AZ")^1) * Cc("}") + ----- command = P("\\") * (Carg(1) * C(R("az","AZ")^1) / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end) + ----- command = P("\\") * (Carg(1) * C(R("az","AZ")^1) * space^0 / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end) + local command = P("\\") * (Carg(1) * C(csletter^1) * space^0 / function(list,c) list[c] = (list[c] or 0) + 1 return "btxcmd{" .. c .. "}" end) + local whatever = P("\\") * P(" ")^1 / " " + + P("\\") * ( P("hbox") + P("raise") ) -- bah + local somemath = P("$") * ((1-P("$"))^1) * P("$") -- let's not assume nested math + ----- character = lpegpatterns.utf8character + local any = P(1) + local done = P(-1) + -- local one_l = P("{") / "" + -- local one_r = P("}") / "" + -- local two_l = P("{{") / "" + -- local two_r = P("}}") / "" + local zero_l_r = P("{}") / "" * #P(1) + local special = P("#") / "\\letterhash " + + local filter_0 = S('\\{}#') + local filter_1 = (1-filter_0)^0 * filter_0 + local filter_2 = Cs( + -- {{...}} ... {{...}} + -- two_l * (command + special + any - two_r - done)^0 * two_r * done + + -- one_l * (command + special + any - one_r - done)^0 * one_r * done + + ( + somemath + + whatever + + command + + special + + collapsed + + zero_l_r + + any + )^0 + ) + + -- Currently we expand shortcuts and for large ones (like the acknowledgements + -- in tugboat.bib) this is not that efficient. However, eventually strings get + -- hashed again. + + local function do_shortcut(key,value,dataset) + publicationsstats.nofshortcuts = publicationsstats.nofshortcuts + 1 + dataset.shortcuts[key] = value + end + + -- todo: categories : metatable that lowers and also counts + -- todo: fields : metatable that lowers + + local tags = table.setmetatableindex("table") + + local function do_definition(category,tag,tab,dataset) + publicationsstats.nofdefinitions = publicationsstats.nofdefinitions + 1 + if tag == "" then + tag = "no-tag-set" + end + local fields = dataset.fields + local luadata = dataset.luadata + local hashtag = tag + if luadata[tag] then + local t = tags[tag] + local d = dataset.name + local n = (t[d] or 0) + 1 + t[d] = n + hashtag = tag .. "-" .. n + if trace_duplicates then + local p = { } + for k, v in sortedhash(t) do + p[#p+1] = formatters["%s:%s"](k,v) + end + report_duplicates("tag %a is present multiple times: % t, assigning hashtag %a",tag,p,hashtag) + end + end + local index = getindex(dataset,luadata,hashtag) + local entries = { + category = lower(category), + tag = tag, + index = index, + } + for i=1,#tab,2 do + local original = tab[i] + local normalized = fields[original] + if not normalized then + normalized = lower(original) -- we assume ascii fields + fields[original] = normalized + end + -- if entries[normalized] then + if rawget(entries,normalized) then + if trace_duplicates then + report_duplicates("redundant field %a is ignored for tag %a in dataset %a",normalized,tag,dataset.name) + end + else + local value = tab[i+1] + value = textoutf(value) + if lpegmatch(filter_1,value) then + value = lpegmatch(filter_2,value,1,dataset.commands) -- we need to start at 1 for { } + end + if normalized == "crossref" then + local parent = luadata[value] + if parent then + setmetatableindex(entries,parent) + else + -- warning + end + end + entries[normalized] = value + end + end + luadata[hashtag] = entries + end + + local function resolve(s,dataset) + return dataset.shortcuts[s] or defaultshortcuts[s] or s -- can be number + end + + local pattern = p_whitespace^0 + * C(P("message") + P("warning") + P("error") + P("comment")) * p_whitespace^0 * P(":") + * p_whitespace^0 + * C(P(1)^1) + + local function do_comment(s,dataset) + local how, what = lpegmatch(pattern,s) + if how and what then + local t = string.splitlines(utilities.strings.striplines(what)) + local b = file.basename(dataset.fullname or dataset.name or "unset") + for i=1,#t do + report("%s > %s : %s",b,how,t[i]) + end + end + end + + local percent = P("%") + local start = P("@") + local comma = P(",") + local hash = P("#") + local escape = P("\\") + local single = P("'") + local double = P('"') + local left = P('{') + local right = P('}') + local both = left + right + local lineending = S("\n\r") + local space = S(" \t\n\r\f") -- / " " + local spacing = space^0 + local equal = P("=") + ----- collapsed = (space^1)/ " " + local collapsed = p_whitespace^1/" " + local nospaces = p_whitespace^1/"" + + local p_left = (p_whitespace^0 * left) / "" + local p_right = (right * p_whitespace^0) / "" + + local balanced = P { + [1] = ((escape * (left+right)) + (collapsed + 1 - (left+right))^1 + V(2))^0, + [2] = left * V(1) * right, + } + + -- local unbalanced = P { + -- [1] = left * V(2) * right, + -- [2] = ((escape * (left+right)) + (collapsed + 1 - (left+right))^1 + V(1))^0, + -- } + + local unbalanced = (left/"") * balanced * (right/"") * P(-1) + + local keyword = C((R("az","AZ","09") + S("@_:-"))^1) + local key = C((1-space-equal)^1) + local tag = C((1-space-comma)^0) + local reference = keyword + local category = C((1-space-left)^1) + local s_quoted = ((escape*single) + collapsed + (1-single))^0 + local d_quoted = ((escape*double) + collapsed + (1-double))^0 + + local b_value = p_left * balanced * p_right + -- local u_value = p_left * unbalanced * p_right -- get rid of outer { } + -- local s_value = (single/"") * (u_value + s_quoted) * (single/"") + -- local d_value = (double/"") * (u_value + d_quoted) * (double/"") + local s_value = (single/"") * (unbalanced + s_quoted) * (single/"") + local d_value = (double/"") * (unbalanced + d_quoted) * (double/"") + local r_value = reference * Carg(1) /resolve + + local somevalue = d_value + b_value + s_value + r_value + local value = Cs((somevalue * ((spacing * hash * spacing)/"" * somevalue)^0)) + + value = value / function(s) return lpegmatch(lpegpatterns.stripper,s) end + + local forget = percent^1 * (1-lineending)^0 + local spacing = spacing * forget^0 * spacing + local assignment = spacing * key * spacing * equal * spacing * value * spacing + local definition = category * spacing * left * spacing * tag * spacing * comma * Ct((assignment * comma^0)^0) * spacing * right * Carg(1) / do_definition + + local crapword = C((1-space-left)^1) + local shortcut = Cmt(crapword,function(_,p,s) return lower(s) == "string" and p end) * spacing * left * ((assignment * Carg(1))/do_shortcut * comma^0)^0 * spacing * right + local comment = Cmt(crapword,function(_,p,s) return lower(s) == "comment" and p end) * spacing * lpegpatterns.argument * Carg(1) / do_comment + + local casecrap = #S("sScC") * (shortcut + comment) + + local bibtotable = (space + forget + P("@") * (casecrap + definition) + 1)^0 + + -- todo \% + + -- loadbibdata -> dataset.luadata + -- loadtexdata -> dataset.luadata + -- loadluadata -> dataset.luadata + + -- converttoxml -> dataset.xmldata from dataset.luadata + + function publications.loadbibdata(dataset,content,source,kind) + if not source then + report("invalid source for dataset %a",dataset) + return + end + local current = datasets[dataset] + local size = #content + if size == 0 then + report("empty source %a for dataset %a",source,current.name) + else + report("adding bib data to set %a from source %a",current.name,source) + end + statistics.starttiming(publications) + publicationsstats.nofbytes = publicationsstats.nofbytes + size + current.nofbytes = current.nofbytes + size + if source then + table.insert(current.sources, { filename = source, checksum = md5.HEX(content) }) + current.loaded[source] = kind or true + end + current.newtags = #current.luadata > 0 and { } or current.newtags + lpegmatch(bibtotable,content or "",1,current) + statistics.stoptiming(publications) + end + +end + +do + + -- we could use xmlescape again + + local cleaner_0 = S('<>&') + local cleaner_1 = (1-cleaner_0)^0 * cleaner_0 + local cleaner_2 = Cs ( ( + P("<") / "<" + + P(">") / ">" + + P("&") / "&" + + P(1) + )^0) + + local compact = false -- can be a directive but then we also need to deal with newlines ... not now + + function publications.converttoxml(dataset,nice,dontstore,usedonly,subset) -- we have fields ! + local current = datasets[dataset] + local luadata = subset or (current and current.luadata) + if luadata then + statistics.starttiming(publications) + -- + local result, r, n = { }, 0, 0 + local usedonly = usedonly and publications.usedentries() + -- + r = r + 1 ; result[r] = "<?xml version='1.0' standalone='yes'?>" + r = r + 1 ; result[r] = "<bibtex>" + -- + if nice then -- will be default + local f_entry_start = formatters[" <entry tag='%s' category='%s' index='%s'>"] + local s_entry_stop = " </entry>" + local f_field = formatters[" <field name='%s'>%s</field>"] + for tag, entry in sortedhash(luadata) do + if not usedonly or usedonly[tag] then + r = r + 1 ; result[r] = f_entry_start(tag,entry.category,entry.index) + for key, value in sortedhash(entry) do + if key ~= "tag" and key ~= "category" and key ~= "index" then + if lpegmatch(cleaner_1,value) then + value = lpegmatch(cleaner_2,value) + end + if value ~= "" then + r = r + 1 ; result[r] = f_field(key,value) + end + end + end + r = r + 1 ; result[r] = s_entry_stop + n = n + 1 + end + end + else + local f_entry_start = formatters["<entry tag='%s' category='%s' index='%s'>"] + local s_entry_stop = "</entry>" + local f_field = formatters["<field name='%s'>%s</field>"] + for tag, entry in next, luadata do + if not usedonly or usedonly[tag] then + r = r + 1 ; result[r] = f_entry_start(entry.tag,entry.category,entry.index) + for key, value in next, entry do + if key ~= "tag" and key ~= "category" and key ~= "index" then + if lpegmatch(cleaner_1,value) then + value = lpegmatch(cleaner_2,value) + end + if value ~= "" then + r = r + 1 ; result[r] = f_field(key,value) + end + end + end + r = r + 1 ; result[r] = s_entry_stop + n = n + 1 + end + end + end + -- + r = r + 1 ; result[r] = "</bibtex>" + -- + result = concat(result,nice and "\n" or nil) + -- + if dontstore then + -- indeed + else + statistics.starttiming(xml) + current.xmldata = xmlconvert(result, { + resolve_entities = true, + resolve_predefined_entities = true, -- in case we have escaped entities + -- unify_predefined_entities = true, -- & -> & + utfize_entities = true, + } ) + statistics.stoptiming(xml) + if lxml then + lxml.register(formatters["btx:%s"](current.name),current.xmldata) + end + end + statistics.stoptiming(publications) + return result, n + end + end + +end + +do + + local function resolvedname(dataset,filename) + local current = datasets[dataset] + if type(filename) ~= "string" then + report("invalid filename %a",tostring(filename)) + end + local fullname = resolvers.findfile(filename,"bib") + if fullname == "" then + fullname = resolvers.findfile(filename) -- let's not be too picky + end + if not fullname or fullname == "" then + report("no file %a",filename) + current.fullname = filename + return current, false + else + current.fullname = fullname + return current, fullname + end + end + + publications.resolvedname = resolvedname + + local cleaner = false + local cleaned = false + + function loaders.registercleaner(what,fullname) + if not fullname or fullname == "" then + report("no %s file %a",what,fullname) + return + end + local list = table.load(fullname) + if not list then + report("invalid %s file %a",what,fullname) + return + end + list = list.replacements + if not list then + report("no replacement table in %a",fullname) + return + end + if cleaned then + report("adding replacements from %a",fullname) + for k, v in next, list do + cleaned[k] = v + end + else + report("using replacements from %a",fullname) + cleaned = list + end + cleaner = true + end + + function loaders.bib(dataset,filename,kind) + local dataset, fullname = resolvedname(dataset,filename) + if not fullname then + return + end + local data = io.loaddata(fullname) or "" + if data == "" then + report("empty file %a, nothing loaded",fullname) + return + end + if cleaner == true then + cleaner = Cs((lpeg.utfchartabletopattern(keys(cleaned)) / cleaned + p_utf8character)^1) + end + if cleaner ~= false then + data = lpegmatch(cleaner,data) + end + if trace then + report("loading file %a",fullname) + end + publications.loadbibdata(dataset,data,fullname,kind) + end + + function loaders.lua(dataset,filename) -- if filename is a table we load that one + local current, data, fullname + if type(filename) == "table" then + current = datasets[dataset] + data = filename + else + dataset, fullname = resolvedname(dataset,filename) + if not fullname then + return + end + current = datasets[dataset] + data = table.load(fullname) + end + if data then + local luadata = current.luadata + -- we want the same index each run + for tag, entry in sortedhash(data) do + if type(entry) == "table" then + entry.index = getindex(current,luadata,tag) + entry.tag = tag + luadata[tag] = entry -- no cleaning yet + end + end + end + end + + function loaders.buffer(dataset,name) -- if filename is a table we load that one + local current = datasets[dataset] + local barename = file.removesuffix(name) + local data = buffers.getcontent(barename) or "" + if data == "" then + report("empty buffer %a, nothing loaded",barename) + return + end + if trace then + report("loading buffer",barename) + end + publications.loadbibdata(current,data,barename,"bib") + end + + function loaders.xml(dataset,filename) + local dataset, fullname = resolvedname(dataset,filename) + if not fullname then + return + end + local current = datasets[dataset] + local luadata = current.luadata + local root = xml.load(fullname) + for bibentry in xmlcollected(root,"/bibtex/entry") do + local attributes = bibentry.at + local tag = attributes.tag + local entry = { + category = attributes.category, + tag = tag, -- afterwards also set, to prevent overload + index = 0, -- prelocated + } + for field in xmlcollected(bibentry,"/field") do + entry[field.at.name] = field.dt[1] -- no cleaning yet | xmltext(field) + end + entry.index = getindex(current,luadata,tag) + entry.tag = tag + luadata[tag] = entry + end + end + + setmetatableindex(loaders,function(t,filetype) + local v = function(dataset,filename) + report("no loader for file %a with filetype %a",filename,filetype) + end + t[filetype] = v + return v + end) + + local done = setmetatableindex("table") + + function publications.load(specification) + local name = specification.dataset or v_default + local current = datasets[name] + local files = settings_to_array(specification.filename) + local kind = specification.kind + local dataspec = specification.specification + statistics.starttiming(publications) + local somedone = false + for i=1,#files do + local filetype, filename = string.splitup(files[i],"::") + if not filename then + filename = filetype + filetype = file.suffix(filename) + end + if filename then + if not filetype or filetype == "" then + filetype = "bib" + end + if file.suffix(filename) == "" then + file.addsuffix(filename,filetype) + end + if done[current][filename] then + report("file %a is already loaded in dataset %a",filename,name) + else + loaders[filetype](current,filename) + done[current][filename] = true + somedone = true + end + if kind then + current.loaded[current.fullname or filename] = kind + end + if dataspec then + current.specifications[dataspec] = true + end + end + end + if somedone then + local runner = enhancer.runner + if runner then + runner(current) + end + end + statistics.stoptiming(publications) + return current + end + +end + +do + + function enhancers.order(dataset) + local luadata = dataset.luadata + local ordered = dataset.ordered + for i=1,#ordered do + local tag = ordered[i] + if type(tag) == "string" then + ordered[i] = luadata[tag] + end + end + end + + function enhancers.details(dataset) + local luadata = dataset.luadata + local details = dataset.details + for tag, entry in next, luadata do + if not details[tag] then + details[tag] = { } + end + end + end + + utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.order") + utilities.sequencers.appendaction(enhancer,"system","publications.enhancers.details") + +end + +do + + local checked = function(s,d) d[s] = (d[s] or 0) + 1 end + local checktex = ( (1-P("\\"))^1 + P("\\") * ((C(R("az","AZ")^1) * Carg(1))/checked))^0 + + function publications.analyze(dataset) + local current = datasets[dataset] + local data = current.luadata + local categories = { } + local fields = { } + local commands = { } + for k, v in next, data do + categories[v.category] = (categories[v.category] or 0) + 1 + for k, v in next, v do + fields[k] = (fields[k] or 0) + 1 + lpegmatch(checktex,v,1,commands) + end + end + current.analysis = { + categories = categories, + fields = fields, + commands = commands, + } + end + +end + +function publications.tags(dataset) + return sortedkeys(datasets[dataset].luadata) +end + +function publications.sortedentries(dataset) + return sortedhash(datasets[dataset].luadata) +end + +-- a helper: + +function publications.concatstate(i,n) + if i == 0 then + return 0 + elseif i == 1 then + return 1 + elseif i == 2 and n == 2 then + return 4 + elseif i == n then + return 3 + else + return 2 + end +end + +-- savers + +do + + local savers = { } + + local s_preamble = [[ +% this is an export from context mkiv + +@preamble{ + \ifdefined\btxcmd + % we're probably in context + \else + \def\btxcmd#1{\csname#1\endcsname} + \fi +} + +]] + + function savers.bib(dataset,filename,tobesaved) + local f_start = formatters["@%s{%s,\n"] + local f_field = formatters[" %s = {%s},\n"] + local s_stop = "}\n\n" + local result = { s_preamble } + local n, r = 0, 1 + for tag, data in sortedhash(tobesaved) do + r = r + 1 ; result[r] = f_start(data.category or "article",tag) + for key, value in sortedhash(data) do + if not privates[key] then + r = r + 1 ; result[r] = f_field(key,value) + end + end + r = r + 1 ; result[r] = s_stop + n = n + 1 + end + report("%s entries from dataset %a saved in %a",n,dataset,filename) + io.savedata(filename,concat(result)) + end + + function savers.lua(dataset,filename,tobesaved) + local list = { } + local n = 0 + for tag, data in next, tobesaved do + local t = { } + for key, value in next, data do + if not privates[key] then + d[key] = value + end + end + list[tag] = t + n = n + 1 + end + report("%s entries from dataset %a saved in %a",n,dataset,filename) + table.save(filename,list) + end + + function savers.xml(dataset,filename,tobesaved) + local result, n = publications.converttoxml(dataset,true,true,false,tobesaved) + report("%s entries from dataset %a saved in %a",n,dataset,filename) + io.savedata(filename,result) + end + + function publications.save(specification) + local dataset = specification.dataset + local filename = specification.filename + local filetype = specification.filetype + local criterium = specification.criterium + statistics.starttiming(publications) + if not filename or filename == "" then + report("no filename for saving given") + return + end + if not filetype or filetype == "" then + filetype = file.suffix(filename) + end + if not criterium or criterium == "" then + criterium = v_all + end + local saver = savers[filetype] + if saver then + local current = datasets[dataset] + local luadata = current.luadata or { } + local tobesaved = { } + local result = structures.lists.filter({criterium = criterium, names = "btx"}) or { } + for i=1,#result do + local userdata = result[i].userdata + if userdata then + local set = userdata.btxset or v_default + if set == dataset then + local tag = userdata.btxref + if tag then + tobesaved[tag] = luadata[tag] + end + end + end + end + saver(dataset,filename,tobesaved) + else + report("unknown format %a for saving %a",filetype,dataset) + end + statistics.stoptiming(publications) + return dataset + end + + if implement then + + implement { + name = "btxsavedataset", + actions = publications.save, + arguments = { + { + { "dataset" }, + { "filename" }, + { "filetype" }, + { "criterium" }, + } + } + } + + end + +end + +-- casters + +do + + publications.detailed = setmetatableindex(function(detailed,kind) + local values = setmetatableindex(function(values,value) + local caster = casters[kind] + local cast = caster and caster(value) or value + values[value] = cast + return cast + end) + detailed[kind] = values + return values + end) + + local keywordsplitter = utilities.parsers.groupedsplitat(";,") + + casters.keyword = function(str) + return lpegmatch(keywordsplitter,str) + end + + + writers.keyword = function(k) + if type(k) == "table" then + return concat(p,";") + else + return k + end + end + + local pagessplitter = lpeg.splitat(P("-")^1) + + casters.range = function(str) + local first, last = lpegmatch(pagessplitter,str) + return first and last and { first, last } or str + end + + writers.range = function(p) + if type(p) == "table" then + return concat(p,"-") + else + return p + end + end + + casters.pagenumber = casters.range + writers.pagenumber = writers.range + +end |