diff options
author | Hans Hagen <pragma@wxs.nl> | 2021-08-07 23:36:31 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg@phi-gamma.net> | 2021-08-07 23:36:31 +0200 |
commit | 06f5d61e0db05d0803ac5b6b4953937c3b88f1ea (patch) | |
tree | 05aa58e16d6d3fcd93fbe09e426892a86ef9d9d6 | |
parent | ffb7a5f21df9b0802cb00f9ef3578209a372ae2f (diff) | |
download | context-06f5d61e0db05d0803ac5b6b4953937c3b88f1ea.tar.gz |
2021-08-07 22:51:00
25 files changed, 953 insertions, 53 deletions
diff --git a/doc/context/documents/general/manuals/workflows-mkiv.pdf b/doc/context/documents/general/manuals/workflows-mkiv.pdf Binary files differindex b63ecd054..1e5f3f287 100644 --- a/doc/context/documents/general/manuals/workflows-mkiv.pdf +++ b/doc/context/documents/general/manuals/workflows-mkiv.pdf diff --git a/doc/context/sources/general/manuals/workflows/workflows-graphics.tex b/doc/context/sources/general/manuals/workflows/workflows-graphics.tex index a24d293df..2246c1c88 100644 --- a/doc/context/sources/general/manuals/workflows/workflows-graphics.tex +++ b/doc/context/sources/general/manuals/workflows/workflows-graphics.tex @@ -8,12 +8,13 @@ \startsection[title=Bad names] -After many years of using \CONTEXT\ in workflows where large amounts of source files -as well as graphics were involved we can safely say that it's hard for publishers to -control the way these are named. This is probably due to the fact that in a -click|-|and|-|point based desktop publishing workflow names don't matter as one stays on -one machine, and names are only entered once (after that these names become abstractions and -get cut and pasted). Proper consistent resource managament is simply not part of the flow. +After many years of using \CONTEXT\ in workflows where large amounts of source +files as well as graphics were involved we can safely say that it's hard for +publishers to control the way these are named. This is probably due to the fact +that in a click|-|and|-|point based desktop publishing workflow names don't +matter as one stays on one machine, and names are only entered once (after that +these names become abstractions and get cut and pasted). Proper consistent +resource managament is simply not part of the flow. This means that you get names like: @@ -29,19 +30,20 @@ like one. In fancy screen fonts upper and lowercase usage might get obscured. It really makes one wonder if copy|-|editing or adding labels to graphics isn't suffering from the same problem. -Anyhow, as in an automated rendering workflow the rendering is often the last step you -can imagine that when names get messed up it's that last step that gets blamed. It's not -that hard to sanitize names of files on disk as well as in the files that refer to them, -and we normally do that we have complete control. This is no option when all the resources -are synchronzied from elsewhere. In that case the only way out is signaling potential -issues. Say that in the source file there is a reference: +Anyhow, as in an automated rendering workflow the rendering is often the last +step you can imagine that when names get messed up it's that last step that gets +blamed. It's not that hard to sanitize names of files on disk as well as in the +files that refer to them, and we normally do that we have complete control. This +is no option when all the resources are synchronzied from elsewhere. In that case +the only way out is signaling potential issues. Say that in the source file there +is a reference: \starttyping foo_Bar_01_03-a.EPS \stoptyping -and that the graphic on disk has the same name, but for some reason after an update -has become: +and that the graphic on disk has the same name, but for some reason after an +update has become: \starttyping foo-Bar_01_03-a.EPS diff --git a/doc/context/sources/general/manuals/workflows/workflows-hashed.tex b/doc/context/sources/general/manuals/workflows/workflows-hashed.tex new file mode 100644 index 000000000..85aa5d5f1 --- /dev/null +++ b/doc/context/sources/general/manuals/workflows/workflows-hashed.tex @@ -0,0 +1,160 @@ +% language=us runpath=texruns:manuals/workflows + +% Musical timestamp: Welcome 2 America by Prince, with a pretty good lineup, +% August 2021. + +\environment workflows-style + +\startcomponent workflows-hashed + +\startchapter[title=Hashed files] + +In a (basically free content) project we had to deal with tens of thousands of +files. Most are in \XML\ format, but there are also thousands of \PNG, \JPG\ and +\SVG images. In large project like this, which covers a large part of Dutch +school math, images can be shared. All the content is available for schools as +\HTML\ but can also be turned into printable form and because schools want to +have stable content over specified periods one has to make a regular snapshot of +this corpus. Also, distributing a few gigabytes if data is not much fun. + +So, in order to bring the amount down a dedicated mechanism for handling files +has bene introduced. After playing with a \SQLITE\ database we finally settled on +just \LUA, simply because it was faster and it also makes the solution +independent. + +The process comes down to creating a file database once in a while, loading a +relatively small hash mapping at runtime and accessing files from a large +data file on demand. Optionally files can be compressed, which makes sense for +the textual files. + +A database is created with one of the \CONTEXT\ extras, for instance: + +\starttyping +context --extra=hashed --database=m4 --pattern=m4all/**.xml --compress +context --extra=hashed --database=m4 --pattern=m4all/**.svg --compress +context --extra=hashed --database=m4 --pattern=m4all/**.jpg +context --extra=hashed --database=m4 --pattern=m4all/**.png +\stoptyping + +The database uses two files: a small \type {m4.lua} file (some 11 megabytes) and +a large \type {m4.dat} (about 820 megabytes, coming from 1850 megabytes +originals). Alternatively you can use a specification, say \type {m4all.lua}: + +\starttyping +return { + { pattern = "m4all/**.xml$", compress = true }, + { pattern = "m4all/**.svg$", compress = true }, + { pattern = "m4all/**.jpg$", compress = false }, + { pattern = "m4all/**.png$", compress = false }, +} +\stoptyping + +\starttyping +context --extra=hashed --database=m4 --patterns=m4all.lua +\stoptyping + +You should see something like on the console: + +\starttyping +hashed > database 'hasheddata', 1627 paths, 46141 names, + 36935 unique blobs, 29674 compressed blobs +\stoptyping + +So here we share some ten thousand files (all images). In case you wonder why we +keep the duplicates: they have unique names (copies) so that when a section is +updated there is no interference with other sections. The tree structure is +mostly six deep (sometimes there is an additional level). + +% \startluacode +% if not resolvers.finders.helpers.validhashed("hasheddata") then +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% pattern = "m4all/**.jpg$", +% compress = false, +% } +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% pattern = "m4all/**.png$", +% compress = false, +% } +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% pattern = "m4all/**.xml$", +% compress = true, +% } +% end +% \stopluacode + +% \startluacode +% if not resolvers.finders.helpers.validhashed("hasheddata") then +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% patterns = { +% { pattern = "m4all/**.jpg$", compress = false }, +% { pattern = "m4all/**.png$", compress = false }, +% { pattern = "m4all/**.svg$", compress = true }, +% { pattern = "m4all/**.xml$", compress = true }, +% }, +% } +% end +% \stopluacode + +Accessing files is the same as with files on the system, but one has to register +a database first: + +\starttyping +\registerhashedfiles[m4] +\stoptyping + +A fully qualified specifier looks like this (not to different from other +specifiers): + +\starttyping +\externalfigure + [hashed:///m4all/books/chapters/h3/h3-if1/images/casino.jpg] +\externalfigure + [hashed:///m4all/books/chapters/ha/ha-c4/images/ha-c44-ex2-s1.png] +\stoptyping + +but nicer would be : + +\starttyping +\externalfigure + [m4all/books/chapters/h3/h3-if1/images/casino.jpg] +\externalfigure + [m4all/books/chapters/ha/ha-c4/images/ha-c44-ex2-s1.png] +\stoptyping + +This is possible when we also specify: + +\starttyping +\registerfilescheme[hashed] +\stoptyping + +This makes the given scheme based resolver kick in first, while the normal +file lookup is used as last resort. + +This mechanism is written on top of the infrastructure that has been part of +\CONTEXT\ \MKIV\ right from the start but this particular feature is only +available in \LMTX\ (backporting is likely a waste of time). + +Just for the record: this mechanism is kept simple, so the database has no update +and replace features. One can just generate a new one. You can test for a valid database +and act upon the outcome: + +\starttyping +\doifelsevalidhashedfiles {m4} { + \writestatus{hashed}{using hashed data} + \registerhashedfiles[m4] + \registerfilescheme[hashed] +} { + \writestatus{hashed}{no hashed data} +} +\stoptyping + +Future version might introduce filename normalization (lowercase, cleanup) so +consider this is first step. First we need test it for a while. + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/workflows/workflows-injectors.tex b/doc/context/sources/general/manuals/workflows/workflows-injectors.tex index 4b784f7cf..e2d46f060 100644 --- a/doc/context/sources/general/manuals/workflows/workflows-injectors.tex +++ b/doc/context/sources/general/manuals/workflows/workflows-injectors.tex @@ -108,7 +108,7 @@ automated \XML\ workflows where last minute control is needed. \stopcomponent -% some day to be described: +% Some day to be described (check Willis tests): % % \showinjector % diff --git a/doc/context/sources/general/manuals/workflows/workflows-mkiv.tex b/doc/context/sources/general/manuals/workflows/workflows-mkiv.tex index 6b8b172b0..1165b621e 100644 --- a/doc/context/sources/general/manuals/workflows/workflows-mkiv.tex +++ b/doc/context/sources/general/manuals/workflows/workflows-mkiv.tex @@ -49,6 +49,7 @@ \component workflows-setups \component workflows-synctex \component workflows-parallel + \component workflows-hashed \stopbodymatter \stopdocument diff --git a/doc/context/sources/general/manuals/workflows/workflows-parallel.tex b/doc/context/sources/general/manuals/workflows/workflows-parallel.tex index a82028de6..006088c2c 100644 --- a/doc/context/sources/general/manuals/workflows/workflows-parallel.tex +++ b/doc/context/sources/general/manuals/workflows/workflows-parallel.tex @@ -88,7 +88,7 @@ provides. Again, caching the input file as above saves a little bit: 10 seconds, so we get 250 seconds. When you run these tests on the machine that you normally work on, waiting for that many jobs to finish is no fun, so what if we (as I then normally -do) watch some music video? With a fullscreen high resolution video shown in the +do) watch some music video? With a full screen high resolution video shown in the foreground the runtime didn't change: still 250 seconds for 1000 jobs with eight parallel runs. On the other hand, a test with Firefox, which is quite demanding, running a video in the background, made the runtime going up by 30 seconds to @@ -118,6 +118,3 @@ is still there, the management script can decide when a next run can be started. \stopchapter \stopcomponent - -% downloaded video : Jojo Mayer's 2019 TED talk: https://www.youtube.com/watch?v=Npq-bhz1ll0} -% realtime video : Andrew Cuomo's daily press conference on dealing with Covid 19 diff --git a/doc/context/sources/general/manuals/workflows/workflows-resources.tex b/doc/context/sources/general/manuals/workflows/workflows-resources.tex index 41de6dc35..323bf8209 100644 --- a/doc/context/sources/general/manuals/workflows/workflows-resources.tex +++ b/doc/context/sources/general/manuals/workflows/workflows-resources.tex @@ -6,10 +6,10 @@ \startchapter[title=Accessing resources] -One of the benefits of \TEX\ is that you can use it in automated workflows -where large quantities of data is involved. A document can consist of -several files and normally also includes images. Of course there are styles -involved too. At \PRAGMA\ normally put styles and fonts in: +One of the benefits of \TEX\ is that you can use it in automated workflows where +large quantities of data is involved. A document can consist of several files and +normally also includes images. Of course there are styles involved too. At +\PRAGMA\ normally put styles and fonts in: \starttyping /data/site/context/tex/texmf-project/tex/context/user/<project>/... @@ -37,11 +37,12 @@ The processing happens in: Putting styles (and resources like logos and common images) and fonts (if the project has specific ones not present in the distribution) in the \TEX\ tree makes sense because that is where such files are normally searched. Of course you -need to keep the distributions file database upto|-|date after adding files there. +need to keep the distributions file database upto|-|date after adding files +there. Processing has to happen isolated from other runs so there we use unique -locations. The services responsible for running also deal with regular cleanup -of these temporary files. +locations. The services responsible for running also deal with regular cleanup of +these temporary files. Resources are somewhat special. They can be stable, i.e.\ change seldom, but more often they are updated or extended periodically (or even daily). We're not @@ -55,7 +56,7 @@ resource tree. In the 100K case there is a deeper structure which is in itself predictable but because many authors are involved the references to these files are somewhat instable (and undefined). It is surprising to notice that publishers don't care about filenames (read: cannot control all the parties involved) which -means that we have inconsist use of mixed case in filenames, and spaces, +means that we have inconsistent use of mixed case in filenames, and spaces, underscores and dashes creeping in. Because typesetting for paper is always at the end of the pipeline (which nowadays is mostly driven by (limitations) of web products) we need to have a robust and flexible lookup mechanism. It's a side @@ -67,7 +68,7 @@ get it fixed. \footnote {From what we normally receive we often conclude that copy|-|editing and image production companies don't impose any discipline or probably simply lack the tools and methods to control this. Some of our workflows had checkers and fixers, so that when we got 5000 new resources while only a few -needed to be replaced we could filter the right ones. It was not uncommon to find +needed to be replaced we could filter the right ones. It was not uncommon to find duplicates for thousands of pictures: similar or older variants.} \starttyping @@ -151,6 +152,9 @@ When you add, remove or move files the tree, you need to remove the \type {dirlist.*} files in the root because these are used for locating files. A new file will be generated automatically. Don't forget this! +When content doesn't change an alternative discussed in in a later chapter can be +considered: hashed databases of files. + \stopchapter \stopcomponent diff --git a/doc/context/sources/general/manuals/workflows/workflows-synctex.tex b/doc/context/sources/general/manuals/workflows/workflows-synctex.tex index bb2128da4..4349461f0 100644 --- a/doc/context/sources/general/manuals/workflows/workflows-synctex.tex +++ b/doc/context/sources/general/manuals/workflows/workflows-synctex.tex @@ -82,10 +82,10 @@ editor} are the following: \stopitemize -It is unavoidable that we get more run time but I assume that for the average user -that is no big deal. It pays off when you have a workflow when a book (or even a -chapter in a book) is generated from hundreds of small \XML\ files. There is no -overhead when \SYNCTEX\ is not used. +It is unavoidable that we get more run time but I assume that for the average +user that is no big deal. It pays off when you have a workflow when a book (or +even a chapter in a book) is generated from hundreds of small \XML\ files. There +is no overhead when \SYNCTEX\ is not used. In \CONTEXT\ we don't use the built|-|in \SYNCTEX\ features, that is: we let filename and line numbers be set but often these are overloaded explicitly. The @@ -120,10 +120,10 @@ A third method is to put this at the top of your file: Often an \XML\ files is very structured and although probably the main body of text is flushed as a stream, specific elements can be flushed out of order. In -educational documents flushing for instance answers to exercises can happen out of -order. In that case we still need to make sure that we go to the right spot in -the file. It will never be 100\% perfect but it's better than nothing. The -above command will also enable \XML\ support. +educational documents flushing for instance answers to exercises can happen out +of order. In that case we still need to make sure that we go to the right spot in +the file. It will never be 100\% perfect but it's better than nothing. The above +command will also enable \XML\ support. If you don't want a file to be accessed, you can block it: @@ -131,8 +131,8 @@ If you don't want a file to be accessed, you can block it: \blocksynctexfile[foo.tex] \stoptyping -Of course you need to configure the viewer to respond to the request for -editing. In Sumatra combined with SciTE the magic command is: +Of course you need to configure the viewer to respond to the request for editing. +In Sumatra combined with \SCITE\ the magic command is: \starttyping c:\data\system\scite\wscite\scite.exe "%f" "-goto:%l" @@ -202,6 +202,23 @@ as described here. \stopsection +\startsection[title=Two-way] + +In for instance the \TEX shop editor, there is a two way connection. The nice +thing about this editor is, is that it is also the first one to use the \type +{mtx-synctex} script to resolve these links, instead of relying on a library. You +can also use this script to inspect a \SYNCTEX\ file yourself, The help into +shows the possible directives. + +\starttyping +mtxrun --script synctex +\stoptyping + +You can resolve positions in the \PDF\ as well as in the sources and list all the +known areas in the log. + +\stopsection + \stopchapter \stopcomponent diff --git a/doc/context/sources/general/manuals/workflows/workflows-xml.tex b/doc/context/sources/general/manuals/workflows/workflows-xml.tex index 93b03bb7b..95a6f71b6 100644 --- a/doc/context/sources/general/manuals/workflows/workflows-xml.tex +++ b/doc/context/sources/general/manuals/workflows/workflows-xml.tex @@ -15,9 +15,9 @@ \startchapter[title={XML}] When you have an \XML\ project with many files involved, finding the right spot -of something that went wrong can be a pain. In one of our project the production of -some 50 books involves 60.000 \XML\ files and 20.000 images. Say that we have the -following file: +of something that went wrong can be a pain. In one of our project the production +of some 50 books involves 60.000 \XML\ files and 20.000 images. \footnote {In the +meantime we could trim this down a lot.} Say that we have the following file: \startbuffer[demo] <?xml version='1.0'?> @@ -32,9 +32,9 @@ following file: \typebuffer[demo] -Before we process this file we will merge the content of the files defined -as includes into it. When this happens the filename is automatically -registered so it can be accessed later. +Before we process this file we will merge the content of the files defined as +includes into it. When this happens the filename is automatically registered so +it can be accessed later. \startbuffer \startxmlsetups xml:initialize diff --git a/tex/context/base/mkii/cont-new.mkii b/tex/context/base/mkii/cont-new.mkii index a901e296e..86a342700 100644 --- a/tex/context/base/mkii/cont-new.mkii +++ b/tex/context/base/mkii/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2021.08.06 01:19} +\newcontextversion{2021.08.07 22:49} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/mkii/context.mkii b/tex/context/base/mkii/context.mkii index 99705f3bd..0fe42e4a1 100644 --- a/tex/context/base/mkii/context.mkii +++ b/tex/context/base/mkii/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2021.08.06 01:19} +\edef\contextversion{2021.08.07 22:49} %D For those who want to use this: diff --git a/tex/context/base/mkiv/cont-new.mkiv b/tex/context/base/mkiv/cont-new.mkiv index 9863f8484..b3b71009e 100644 --- a/tex/context/base/mkiv/cont-new.mkiv +++ b/tex/context/base/mkiv/cont-new.mkiv @@ -13,7 +13,7 @@ % \normalend % uncomment this to get the real base runtime -\newcontextversion{2021.08.06 01:19} +\newcontextversion{2021.08.07 22:49} %D This file is loaded at runtime, thereby providing an excellent place for hacks, %D patches, extensions and new features. There can be local overloads in cont-loc diff --git a/tex/context/base/mkiv/context.mkiv b/tex/context/base/mkiv/context.mkiv index e984d930d..fb5f34e21 100644 --- a/tex/context/base/mkiv/context.mkiv +++ b/tex/context/base/mkiv/context.mkiv @@ -45,7 +45,7 @@ %D {YYYY.MM.DD HH:MM} format. \edef\contextformat {\jobname} -\edef\contextversion{2021.08.06 01:19} +\edef\contextversion{2021.08.07 22:49} %D Kind of special: diff --git a/tex/context/base/mkiv/mtx-context-hashed.tex b/tex/context/base/mkiv/mtx-context-hashed.tex new file mode 100644 index 000000000..88de10f3f --- /dev/null +++ b/tex/context/base/mkiv/mtx-context-hashed.tex @@ -0,0 +1,136 @@ +%D \module +%D [ file=mtx-context-hashed, +%D version=2009.03.21, +%D title=\CONTEXT\ Extra Trickry, +%D subtitle=Combine Files, +%D author=Hans Hagen, +%D date=\currentdate, +%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}] +%C +%C This module is part of the \CONTEXT\ macro||package and is +%C therefore copyrighted by \PRAGMA. See mreadme.pdf for +%C details. + +% begin help +% +% usage: context --extra=hashed [options] list-of-files +% +% --database : database to create or extend +% --pattern : file pattern +% --patterns : pattern file +% --compression : apply compression +% +% end help + +% \startluacode +% if not resolvers.finders.helpers.validhashed("hasheddata") then +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% pattern = "m4all/**.jpg$", +% compress = false, +% } +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% pattern = "m4all/**.png$", +% compress = false, +% } +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% pattern = "m4all/**.xml$", +% compress = true, +% } +% end +% \stopluacode + +% \startluacode +% if not resolvers.finders.helpers.validhashed("hasheddata") then +% resolvers.finders.helpers.createhashed { +% database = "hasheddata", +% patterns = { +% { pattern = "m4all/**.jpg$", compress = false }, +% { pattern = "m4all/**.png$", compress = false }, +% { pattern = "m4all/**.xml$", compress = true }, +% }, +% } +% end +% \stopluacode + +% context --extra=hashed --database=hasheddata --pattern=m4all/**.jpg +% context --extra=hashed --database=hasheddata --pattern=m4all/**.png +% context --extra=hashed --database=hasheddata --pattern=m4all/**.xml --compress +% context --extra=hashed --database=hasheddata --pattern=m4all/**.svg --compress + +% -- m4all.lua: +% +% return { +% { pattern = "m4all/**.jpg$", compress = false }, +% { pattern = "m4all/**.png$", compress = false }, +% { pattern = "m4all/**.svg$", compress = true }, +% { pattern = "m4all/**.xml$", compress = true }, +% } +% +% context --extra=hashed --database=hasheddata --patterns=m4all.lua + +% \registerhashedfiles[hasheddata] +% \registerfilescheme[hashed] +% +% \externalfigure[hashed:///m4all/books/chapters/h3/h3-if1/images/highres/casino.jpg] [height=1cm] +% \externalfigure[hashed:///m4all/books/chapters/ha/ha-c4/images/highres/ha-c44-ex2-s1.png][height=1cm] +% \externalfigure[m4all/books/chapters/h3/h3-if1/images/highres/casino.jpg] [height=1cm] +% \externalfigure[m4all/books/chapters/ha/ha-c4/images/highres/ha-c44-ex2-s1.png] [height=1cm] + +\input mtx-context-common.tex + +\noheaderandfooterlines \setupbodyfont[tt] + +\starttext + +\startluacode + + if CONTEXTLMTXMODE > 0 then + + local database = document.arguments.database + + if database then + local metadata = false + if tex.systemmodes["first"] then + metadata = resolvers.finders.helpers.createhashed { + database = database, + pattern = document.arguments.pattern, + compress = document.arguments.compress, + patterns = document.arguments.patterns and table.load(document.arguments.patterns), + } + else + metadata = resolvers.finders.helpers.validhashed(database) + if metadata then + metadata = metadata.metadata + end + end + if metadata then + local function show(what) + context.NC() context(what) + context.EQ() context(metadata[what]) + context.NC() context.NR() + end + context.starttitle { title = "database: " .. database } + context.starttabulate { "||r|" } + show("nofnames") + show("nofpaths") + show("nofblobs") + show("nofcompressed") + context.stoptabulate() + context.stoptitle() + else + context("something went wrong, invalid database") + end + else + context("something went wrong, no database specified") + end + else + context("you need lmtx") + end + +\stopluacode + +\stoptext + diff --git a/tex/context/base/mkiv/status-files.pdf b/tex/context/base/mkiv/status-files.pdf Binary files differindex 912ec565f..654c04404 100644 --- a/tex/context/base/mkiv/status-files.pdf +++ b/tex/context/base/mkiv/status-files.pdf diff --git a/tex/context/base/mkiv/status-lua.pdf b/tex/context/base/mkiv/status-lua.pdf Binary files differindex 6c3ca2ec2..de0ad44a4 100644 --- a/tex/context/base/mkiv/status-lua.pdf +++ b/tex/context/base/mkiv/status-lua.pdf diff --git a/tex/context/base/mkxl/cont-new.mkxl b/tex/context/base/mkxl/cont-new.mkxl index d7196efb4..c9a545148 100644 --- a/tex/context/base/mkxl/cont-new.mkxl +++ b/tex/context/base/mkxl/cont-new.mkxl @@ -13,7 +13,7 @@ % \normalend % uncomment this to get the real base runtime -\newcontextversion{2021.08.06 01:19} +\newcontextversion{2021.08.07 22:49} %D This file is loaded at runtime, thereby providing an excellent place for hacks, %D patches, extensions and new features. There can be local overloads in cont-loc diff --git a/tex/context/base/mkxl/context.mkxl b/tex/context/base/mkxl/context.mkxl index 75388d7b3..8c2bcc2aa 100644 --- a/tex/context/base/mkxl/context.mkxl +++ b/tex/context/base/mkxl/context.mkxl @@ -29,7 +29,7 @@ %D {YYYY.MM.DD HH:MM} format. \immutable\edef\contextformat {\jobname} -\immutable\edef\contextversion{2021.08.06 01:19} +\immutable\edef\contextversion{2021.08.07 22:49} %overloadmode 1 % check frozen / warning %overloadmode 2 % check frozen / error diff --git a/tex/context/base/mkxl/data-fil.lmt b/tex/context/base/mkxl/data-fil.lmt new file mode 100644 index 000000000..bbcc954b2 --- /dev/null +++ b/tex/context/base/mkxl/data-fil.lmt @@ -0,0 +1,144 @@ +if not modules then modules = { } end modules ['data-fil'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local ioopen = io.open +local isdir = lfs.isdir + +local trace_locating = false trackers.register("resolvers.locating", function(v) trace_locating = v end) + +local report_files = logs.reporter("resolvers","files") + +local resolvers = resolvers +local resolveprefix = resolvers.resolve +local findfile = resolvers.findfile +local scanfiles = resolvers.scanfiles +local registerfilehash = resolvers.registerfilehash +local appendhash = resolvers.appendhash + +local loadcachecontent = caches.loadcontent + +local checkgarbage = utilities.garbagecollector and utilities.garbagecollector.check + +function resolvers.locators.file(specification) + local filename = specification.filename + local realname = resolveprefix(filename) -- no shortcut + if realname and realname ~= '' and isdir(realname) then + if trace_locating then + report_files("file locator %a found as %a",filename,realname) + end + appendhash('file',filename,true) -- cache + elseif trace_locating then + report_files("file locator %a not found",filename) + end +end + +function resolvers.hashers.file(specification) + local pathname = specification.filename + local content = loadcachecontent(pathname,'files') + registerfilehash(pathname,content,content==nil) +end + +function resolvers.generators.file(specification) + local pathname = specification.filename + local content = scanfiles(pathname,false,true) -- scan once + registerfilehash(pathname,content,true) +end + +resolvers.concatinators.file = file.join + +local finders = resolvers.finders +local notfound = finders.notfound + +function finders.file(specification,filetype) + local filename = specification.filename + local foundname = findfile(filename,filetype) + if foundname and foundname ~= "" then + if trace_locating then + report_files("file finder: %a found",filename) + end + return foundname + else + if trace_locating then + report_files("file finder: %a not found",filename) + end + return notfound() + end +end + +-- The default textopener will be overloaded later on. + +local openers = resolvers.openers +local notfound = openers.notfound +local overloaded = false + +local function textopener(tag,filename,f) + return { + reader = function() return f:read () end, + close = function() return f:close() end, + } +end + +function openers.helpers.textopener(...) + return textopener(...) +end + +function openers.helpers.settextopener(opener) + if overloaded then + report_files("file opener: %s overloaded","already") + else + if trace_locating then + report_files("file opener: %s overloaded","once") + end + overloaded = true + textopener = opener + end +end + +function openers.file(specification,filetype) + local filename = specification.filename + if filename and filename ~= "" then + local f = ioopen(filename,"r") + if f then + if trace_locating then + report_files("file opener: %a opened",filename) + end + return textopener("file",filename,f) + end + end + if trace_locating then + report_files("file opener: %a not found",filename) + end + return notfound() +end + +local loaders = resolvers.loaders +local notfound = loaders.notfound + +function loaders.file(specification,filetype) + local filename = specification.filename + if filename and filename ~= "" then + local f = ioopen(filename,"rb") + if f then + if trace_locating then + report_files("file loader: %a loaded",filename) + end + local s = f:read("*a") -- io.readall(f) is faster but we never have large files here + if checkgarbage then + checkgarbage(#s) + end + f:close() + if s then + return true, s, #s + end + end + end + if trace_locating then + report_files("file loader: %a not found",filename) + end + return notfound() +end diff --git a/tex/context/base/mkxl/data-hsh.lmt b/tex/context/base/mkxl/data-hsh.lmt new file mode 100644 index 000000000..0a2d94f81 --- /dev/null +++ b/tex/context/base/mkxl/data-hsh.lmt @@ -0,0 +1,382 @@ +-- only lmt because the backend code doesn't deal with it and it makes +-- no sense to waste time on that for mkiv + +if not modules then modules = { } end modules ['data-hsh'] = { + version = 0.002, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- todo: options +-- +-- lowercase +-- cleanupnames (normalize) +-- use database from project tree + +local type = type +local gsub = string.gsub +local addsuffix, basename, pathpart, filesuffix, filesize = file.addsuffix, file.basename, file.pathpart, file.suffix, file.size +local loadtable, savetable = table.load, table.save +local loaddata, savedata, open = io.loaddata, io.savedata, io.open + +local trace_hashed = false +local report_hashed = logs.reporter("resolvers","hashed") + +trackers.register("resolvers.locating", function(v) trace_hashed = v end) +trackers.register("resolvers.hashed", function(v) trace_hashed = v end) + +-- we can have a virtual file: open at the position, make sure read and seek don't +-- go beyond the boundaries + +local resolvers = resolvers +local finders = resolvers.finders +local openers = resolvers.openers +local loaders = resolvers.loaders + +local ordered = { } +local hashed = { } +local version = 0.002 + +-- local lowercase = characters.lower + +local function showstatus(database,metadata) + report_hashed("database %a, %i paths, %i names, %i unique blobs, %i compressed blobs", + database, metadata.nofpaths, metadata.nofnames, metadata.nofblobs, metadata.nofcompressed + ) +end + +local function validhashed(database) + local found = hashed[database] + if found then + return found + else + local metaname = addsuffix(database,"lua") + local dataname = addsuffix(database,"dat") + local metadata = loadtable(metaname) + if type(metadata) ~= "table" then + report_hashed("invalid database %a",metaname) + elseif metadata.version ~= version then + report_hashed("version mismatch in database %a",metaname) + elseif not lfs.isfile(dataname) then + report_hashed("missing data data file for %a",metaname) + else + return { + database = database, + metadata = metadata, + dataname = dataname, + } + end + end +end + +local function registerhashed(database) + if not hashed[database] then + local valid = validhashed(database) + if valid then + ordered[#ordered + 1] = valid + hashed[database] = ordered[#ordered] + showstatus(database,valid.metadata) + end + end +end + +local registerfilescheme do + + local findfile = finders.file + + local list = { } + local done = { } + local hash = { } + + registerfilescheme = function(name) + if not done[name] then + list[#list+1] = name + done[name] = true + end + end + + -- why does the finder not remember ? + + function finders.file(specification,filetype) + if type(specification) == "table" then + local original = specification.original + -- print(original) + if original then + local found = hash[original] + if found == nil then + for i=1,#list do + local scheme = list[i] + local found = finders[scheme](specification,filetype) + if found then + hash[original] = found + if trace_hashed then + report_hashed("found by auto scheme %s: %s",scheme,found) + end + return found + end + end + local found = findfile(specification,filetype) + if found then + hash[original] = found + if trace_hashed then + report_hashed("found by normal file scheme: %s",found) + end + return found + end + hash[original] = false + elseif found then + return found + end + return false + else + -- something is wrong here, maybe we should trace it (scheme can be "unknown") + end + end + -- again, something is wrong + return findfile(specification,filetype) + end + +end + +finders.helpers.validhashed = validhashed +finders.helpers.registerhashed = registerhashed +finders.helpers.registerfilescheme = registerfilescheme + +local function locate(found,path,name) + local files = found.metadata.files + local hashes = found.metadata.hashes + local fp = files[path] + local hash = fp and fp[name] + if hash and hashes[hash] then + return hash + end +end + +local function locatehash(filename,database) + if filename then + local name = basename(filename) + local path = pathpart(filename) + local hash = false + if database then + local found = hashed[database] + if found then + hash = locate(found,path,name), database, path, name + end + else + for i=1,#ordered do + local found = ordered[i] + hash = locate(found,path,name) + if hash then + database = found.database + break + end + end + end + if hash then + return { + hash = hash, + name = name, + path = path, + base = database, + } + end + end +end + +-- no caching yet, we don't always want the file and it's fast enough + +local function locateblob(filename,database) + local found = locatehash(filename,database) + if found then + local database = found.base + local data = hashed[database] + if data then + local metadata = data.metadata + local dataname = data.dataname + local hashes = metadata.hashes + local blobdata = hashes[found.hash] + if blobdata and dataname then + local position = blobdata.position + local f = open(dataname,"rb") + if f then + f:seek("set",position) + local blob = f:read(blobdata.datasize) + if blobdata.compress == "zip" then + blob = zlib.decompresssize(blob,blobdata.filesize) + end + return blob + end + end + end + end +end + +local finders = resolvers.finders +local notfound = finders.notfound + +function finders.hashed(specification) + local original = specification.original + local fullpath = specification.path + if fullpath then + local found = locatehash(fullpath) + if found then + if trace_hashed then + report_hashed("finder: file %a found",original) + end + return original + end + end + if trace_hashed then + report_hashed("finder: unknown file %a",original) + end + return notfound() +end + +local notfound = openers.notfound +local textopener = openers.helpers.textopener + +function openers.hashed(specification) + local original = specification.original + local fullpath = specification.path + if fullpath then + local found = locateblob(fullpath) + if found then + if trace_hashed then + report_hashed("finder: file %a found",original) + end + return textopener("hashed",original,found,"utf-8") + end + end + if trace_hashed then + report_hashed("finder: unknown file %a",original) + end + return notfound() +end + +local notfound = loaders.notfound + +function loaders.hashed(specification) + local original = specification.original + local fullpath = specification.path + if fullpath then + local found = locateblob(fullpath) + if found then + if trace_hashed then + report_hashed("finder: file %a found",original) + end + return true, found, found and #found or 0 + end + end + if trace_hashed then + report_hashed("finder: unknown file %a",original) + end + return notfound() +end + +-- this actually could end up in the generate namespace but it is not +-- really a 'generic' feature, more a module (at least for now) + +local calculatehash = sha2.HEX256 -- md5.HEX is not unique enough + +function resolvers.finders.helpers.createhashed(specification) + local database = specification.database + local patterns = specification.patterns + if not patterns then + local pattern = specification.pattern + if pattern then + patterns = { + { + pattern = pattern, + compress = specification.compress, + } + } + end + end + local datname = addsuffix(database,"dat") + local luaname = addsuffix(database,"lua") + local metadata = loadtable(luaname) + if type(metadata) ~= "table" then + metadata = false + elseif metadata.kind == "hashed" and metadata.version ~= version then + report_hashed("version mismatch, starting with new table") + metadata = false + end + if not metadata then + metadata = { + version = version, + kind = "hashed", + files = { }, + hashes = { }, + nofnames = 0, + nofpaths = 0, + nofblobs = 0, + nofcompressed = 0, + } + end + local files = metadata.files + local hashes = metadata.hashes + local nofpaths = metadata.nofpaths + local nofnames = metadata.nofnames + local nofblobs = metadata.nofblobs + local nofcompressed = metadata.nofcompressed + if type(patterns) == "table" then + for i=1,#patterns do + local pattern = patterns[i].pattern + if pattern then + local compress = patterns[i].compress + local list = dir.glob(pattern) + local total = #list + report_hashed("database %a, adding pattern %a, compression %l",database,pattern,compress) + for i=1,total do + local filename = list[i] + local name = basename(filename) + local path = pathpart(filename) + local data = loaddata(filename) + -- cleanup + path = gsub(path,"^[./]*","") + -- + if data then + local fp = files[path] + if not fp then + fp = { } + files[path] = fp + nofpaths = nofpaths + 1 + end + local ff = fp[name] + if not ff then + local hash = calculatehash(data) + if not hashes[hash] then + local size = #data + if compress then + data = zlib.compresssize(data,size) + nofcompressed = nofcompressed + 1 + end + local position = filesize(datname) + savedata(datname,data,"",true) + hashes[hash] = { + filesize = size, + datasize = #data, + compress = compress and "zip", + position = position, + } + nofblobs = nofblobs + 1 + end + fp[name] = hash + nofnames = nofnames + 1 + end + end + end + end + end + end + metadata.nofpaths = nofpaths + metadata.nofnames = nofnames + metadata.nofblobs = nofblobs + metadata.nofcompressed = nofcompressed + savetable(luaname, metadata) + showstatus(database,metadata) + return metadata +end + diff --git a/tex/context/base/mkxl/file-job.lmt b/tex/context/base/mkxl/file-job.lmt index 90aea19e6..dac6f6d9a 100644 --- a/tex/context/base/mkxl/file-job.lmt +++ b/tex/context/base/mkxl/file-job.lmt @@ -10,7 +10,7 @@ if not modules then modules = { } end modules ['file-job'] = { -- and push/poppign at the tex end local next, rawget, tostring, tonumber = next, rawget, tostring, tonumber -local gsub, match, find = string.gsub, string.match, string.find +local gsub, match, gmatch, ind = string.gsub, string.match, string.gmatch, string.find local insert, remove, concat = table.insert, table.remove, table.concat local validstring, formatters = string.valid, string.formatters local sortedhash = table.sortedhash @@ -1300,3 +1300,45 @@ implement { -- ctx_doifelse(continue) end } + +-- data-hsh.lmt: + +local helpers = resolvers.finders.helpers +local validhashed = helpers.validhashed +local registerhashed = helpers.registerhashed +local registerfilescheme = helpers.registerfilescheme + +implement { + name = "registerhashedfiles", + public = true, + protected = true, + arguments = "optional", + actions = function(list) + for name in gmatch(list,"[^, ]+") do + registerhashed(name) + end + end, +} + +implement { + name = "registerfilescheme", + public = true, + protected = true, + arguments = "optional", + actions = function(list) + for name in gmatch(list,"[^, ]+") do + registerfilescheme(name) + end + end, +} + +implement { + name = "doifelsevalidhashedfiles", + public = true, + protected = true, + arguments = "string", + actions = function(name) + ctx_doifelse(validhashed(name)) + end, +} + diff --git a/tex/context/base/mkxl/file-job.mklx b/tex/context/base/mkxl/file-job.mklx index e39ed17ed..58c6aa3d5 100644 --- a/tex/context/base/mkxl/file-job.mklx +++ b/tex/context/base/mkxl/file-job.mklx @@ -18,6 +18,7 @@ %D This module delegates most of the work to \LUA\ and therefore also let it %D define the commands, which is more efficient. +\registerctxluafile{data-hsh}{autosuffix} \registerctxluafile{file-job}{autosuffix} %D Here are some helpers for processing and path control. In the following example diff --git a/tex/context/base/mkxl/font-sym.mklx b/tex/context/base/mkxl/font-sym.mklx index c908ad849..e1de316ef 100644 --- a/tex/context/base/mkxl/font-sym.mklx +++ b/tex/context/base/mkxl/font-sym.mklx @@ -170,6 +170,17 @@ \currentsymbolfont \gletcsname\??symbolfont\askedsymbolfont\endcsname\lastrawfontcall} +% \definefontfeature[colored][colr=yes] +% \definefontsynonym[flags][file:BabelStoneFlagsDual.ttf*colored] +% \definesymbol[BR][{\getnamedglyphdirect {flags}{br}}] +% \definesymbol[PT][{\getnamedglyphdirect {flags}{pt}}] +% \definesymbol[BR][{\getnamedglyphdirectscaled{.7}{flags}{br}}] +% \definesymbol[PT][{\getnamedglyphdirectscaled{.7}{flags}{pt}}] +% +% \def\glyphscaled#1{\cldcontext{math.floor(\the\glyphscale*#1)}\relax} +% \definesymbol[BR][{\glyphscale\glyphscaled{.7}\getnamedglyphdirect{flags}{br}}] +% \definesymbol[PT][{\glyphscale\glyphscaled{.7}\getnamedglyphdirect{flags}{pt}}] + \permanent\protected\def\getnamedglyphstyled#fontname#character{{\setstyledsymbolicfont{#fontname}\clf_fontchar{#character}}} \permanent\protected\def\getnamedglyphdirect#fontname#character{{\setdirectsymbolicfont{#fontname}\clf_fontchar{#character}}} \permanent\protected\def\getglyphstyled #fontname#character{{\setstyledsymbolicfont{#fontname}\doifelsenumber{#character}\char\donothing#character}} @@ -177,6 +188,9 @@ \permanent\protected\def\resolvedglyphstyled#fontname#character{{\setstyledsymbolicfont{#fontname}\clf_tochar{#character}}} \permanent\protected\def\resolvedglyphdirect#fontname#character{{\setdirectsymbolicfont{#fontname}\clf_tochar{#character}}} +\permanent\protected\def\getnamedglyphdirectscaled#scale#fontname#character% + {{\setscaleddirectsymbolicfont\fontbody{#scale}{#fontname}\clf_fontchar{#character}}} + % this one is wrong: \permanent\protected\def\getscaledglyph#scale#name#content% diff --git a/tex/context/base/mkxl/lpdf-tag.lmt b/tex/context/base/mkxl/lpdf-tag.lmt index 3ff058a4e..5b52f56fd 100644 --- a/tex/context/base/mkxl/lpdf-tag.lmt +++ b/tex/context/base/mkxl/lpdf-tag.lmt @@ -223,7 +223,7 @@ local function initializepage() if pagenum > lastintree then lastintree = pagenum else - report_tags("beware: page order problem in tree at page %i", pagenum) + -- report_tags("beware: page order problem in tree at page %i", pagenum) end tree[pagenum] = list -- we can flush after done, todo end diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 472079bfb..8985f55de 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : c:/data/develop/context/sources/luatex-fonts-merged.lua -- parent file : c:/data/develop/context/sources/luatex-fonts.lua --- merge date : 2021-08-06 01:19 +-- merge date : 2021-08-07 22:49 do -- begin closure to overcome local limits and interference |