diff options
author | Context Git Mirror Bot <phg42.2a@gmail.com> | 2016-01-29 20:15:07 +0100 |
---|---|---|
committer | Context Git Mirror Bot <phg42.2a@gmail.com> | 2016-01-29 20:15:07 +0100 |
commit | 397508ee8dca010aec5a9c6951b68434e5657f14 (patch) | |
tree | dae6e16fb8990964ba5261228231696cca3859a1 /doc | |
parent | 452587cdeefbf6e3bf1eee91e4e976f1135b785f (diff) | |
download | context-397508ee8dca010aec5a9c6951b68434e5657f14.tar.gz |
2016-01-29 19:02:00
Diffstat (limited to 'doc')
-rw-r--r-- | doc/context/documents/general/manuals/xml-mkiv.pdf | bin | 0 -> 1307686 bytes | |||
-rw-r--r-- | doc/context/sources/general/manuals/xml/xml-mkiv-01.xml | 15 | ||||
-rw-r--r-- | doc/context/sources/general/manuals/xml/xml-mkiv-02.xml | 15 | ||||
-rw-r--r-- | doc/context/sources/general/manuals/xml/xml-mkiv.tex | 3651 |
4 files changed, 3681 insertions, 0 deletions
diff --git a/doc/context/documents/general/manuals/xml-mkiv.pdf b/doc/context/documents/general/manuals/xml-mkiv.pdf Binary files differnew file mode 100644 index 000000000..5093710d7 --- /dev/null +++ b/doc/context/documents/general/manuals/xml-mkiv.pdf diff --git a/doc/context/sources/general/manuals/xml/xml-mkiv-01.xml b/doc/context/sources/general/manuals/xml/xml-mkiv-01.xml new file mode 100644 index 000000000..c2feac218 --- /dev/null +++ b/doc/context/sources/general/manuals/xml/xml-mkiv-01.xml @@ -0,0 +1,15 @@ +<name>Land Of Dreams</name> +<tracks> + <track length="248">Dixie Flyer</track> + <track length="212">New Orleans Wins The War</track> + <track length="218">Four Eyes</track> + <track length="181">Falling In Love</track> + <track length="187">Something Special</track> + <track length="168">Bad News From Home</track> + <track length="207">Roll With The Punches</track> + <track length="209">Masterman And Baby J</track> + <track length="134">Follow The Flag</track> + <track length="246">I Want You To Hurt Like I Do</track> + <track length="248">It's Money That Matters</track> + <track length="156">Red Bandana</track> +</tracks> diff --git a/doc/context/sources/general/manuals/xml/xml-mkiv-02.xml b/doc/context/sources/general/manuals/xml/xml-mkiv-02.xml new file mode 100644 index 000000000..997123ad6 --- /dev/null +++ b/doc/context/sources/general/manuals/xml/xml-mkiv-02.xml @@ -0,0 +1,15 @@ +<name>Bad Love</name> +<tracks> + <track length="340">My Country</track> + <track length="295">Shame</track> + <track length="205">I'm Dead (But I Don't Know It)</track> + <track length="213">Every Time It Rains</track> + <track length="206">The Great Nations of Europe</track> + <track length="220">The One You Love</track> + <track length="164">The World Isn't Fair</track> + <track length="264">Big Hat, No Cattle</track> + <track length="243">Better Off Dead</track> + <track length="236">I Miss You</track> + <track length="126">Going Home</track> + <track length="180">I Want Everyone To Like Me</track> +</tracks> diff --git a/doc/context/sources/general/manuals/xml/xml-mkiv.tex b/doc/context/sources/general/manuals/xml/xml-mkiv.tex new file mode 100644 index 000000000..87317b69b --- /dev/null +++ b/doc/context/sources/general/manuals/xml/xml-mkiv.tex @@ -0,0 +1,3651 @@ +% language=uk + +% to be checked: +% +% \Ux in index +% +% undocumented: +% +% \processXMLbuffer +% \processxmlbuffer +% \processxmlfile +% +% kind of special ... tricky explanation needed: +% +% \xmldirect + +\input lxml-ctx.mkiv + +\settrue \xmllshowtitle +\setfalse\xmllshowwarning + +\usemodule[set-11] + +\loadsetups[i-en-xml.xml] + +% \definehspace[squad][1em plus .25em minus .25em] + +\usemodule[abr-02] + +\setuplayout + [location=middle, + marking=on, + backspace=20mm, + cutspace=20mm, + topspace=15mm, + header=15mm, + footer=15mm, + height=middle, + width=middle] + +\setuppagenumbering + [alternative=doublesided, + location=] + +\setupfootertexts + [][pagenumber] + +\setupheadertexts + [][chapter] + +\setupheader + [color=colortwo, + style=bold] + +\setupfooter + [color=colortwo, + style=bold] + +\setuphead + [chapter] + [page={yes,header,right}, + header=empty, + style=\bfc] + +\setupsectionblock + [page={yes,header,right}] + +\starttexdefinition unexpanded section:chapter:number #1 + \doifmode{*sectionnumber} { + \llap{<\enspace}#1\enspace> + } +\stoptexdefinition + +\starttexdefinition unexpanded section:section:number #1 + \doifmode{*sectionnumber} { + \llap{<<\enspace}#1\enspace>> + } +\stoptexdefinition + +\starttexdefinition unexpanded section:subsection:number #1 + \doifmode{*sectionnumber} { + \llap{<<<\enspace}#1\enspace>>> + } +\stoptexdefinition + +\setuphead[chapter] [numbercolor=black,numbercommand=\texdefinition{section:chapter:number}] +\setuphead[section] [numbercolor=black,numbercommand=\texdefinition{section:section:number}] +\setuphead[subsection][numbercolor=black,numbercommand=\texdefinition{section:subsection:number}] + +\setuphead + [section] + [style=\bfa] + +\setuplist + [chapter] + [style=bold] + +\setupinteractionscreen + [option=doublesided] + +\setupalign + [tolerant,stretch] + +\setupwhitespace + [big] + +\setuptolerance + [tolerant] + +\doifelsemode {atpragma} { + \setupbodyfont[lucidaot,10pt] +} { + \setupbodyfont[dejavu,10pt] +} + +\definecolor[colorone] [b=.5] +\definecolor[colortwo] [s=.3] +\definecolor[colorthree][y=.5] + +\setuptype + [color=colorone] + +\setuptyping + [color=colorone] + +\setuphead + [lshowtitle] + [style=\tt, + color=colorone] + +\setuphead + [chapter,section] + [numbercolor=colortwo, + color=colorone] + +\definedescription + [xmlcmd] + [alternative=hanging, + width=line, + distance=1em, + margin=2em, + headstyle=monobold, + headcolor=colorone] + +\setupframedtext + [setuptext] + [framecolor=colorone, + rulethickness=1pt, + corner=round] + +\usemodule[punk] + +\usetypescript[punk] + +\definelayer + [page] + [width=\paperwidth, + height=\paperheight] + +\starttext + +\setuplayout[page] + +\startstandardmakeup + \startfontclass[none] % nil the current fontclass since it may append its features + \EnableRandomPunk + \setlayerframed + [page] + [width=\paperwidth,height=\paperheight, + background=color,backgroundcolor=colorone,backgroundoffset=1ex,frame=off] + {} + \definedfont[demo@punk at 18pt] + \setbox\scratchbox\vbox { + \hsize\dimexpr\paperwidth+2ex\relax + \setupinterlinespace + \baselineskip 1\baselineskip plus 1pt minus 1pt + \raggedcenter + \color[colortwo]{\dorecurse{1000}{XML }} + } + \setlayer + [page] + [preset=middle] + {\vsplit\scratchbox to \dimexpr\paperheight+2ex\relax} + \definedfont[demo@punk at 90pt] + \setstrut + \setlayerframed + [page] + [preset=rightbottom,offset=10mm] + [foregroundcolor=colorthree,align=flushright,offset=overlay,frame=off] + {Dealing\\with XML in\\Con\TeX t MkIV} + \definedfont[demo@punk at 18pt] + \setstrut + \setlayerframed + [page] + [preset=righttop,offset=10mm,x=3mm,rotation=90] + [foregroundcolor=colorthree,align=flushright,offset=overlay,frame=off] + {Hans Hagen, Pragma ADE, \currentdate} + \tightlayer[page] + \stopfontclass +\stopstandardmakeup + +\setuplayout + +\startfrontmatter + +\starttitle[title=Contents] + +\placelist + [chapter,section] + +\stoptitle + +\startchapter[title={Introduction}] + +This manual presents the \MKIV\ way of dealing with \XML. Although the +traditional \MKII\ streaming parser has a charming simplicity in its control, for +complex documents the tree based \MKIV\ method is more convenient. It is for this +reason that the old method has been removed from \MKIV. If you are familiar with +\XML\ processing in \MKII, then you will have noticed that the \MKII\ commands +have \type {XML} in their name. The \MKIV\ commands have a lowercase \type {xml} +in their names. That way there is no danger for confusion or a mixup. + +You may wonder why we do these manipulations in \TEX\ and not use \XSLT\ (or +other transformation methods) instead. The advantage of an integrated approach is +that it simplifies usage. Think of not only processing the document, but also +using \XML\ for managing resources in the same run. An \XSLT\ approach is just as +verbose (after all, you still need to produce \TEX\ code) and probably less +readable. In the case of \MKIV\ the integrated approach is also faster and gives +us the option to manipulate content at runtime using \LUA. It has the additional +advantage that to some extend we can handle a mix of \TEX\ and \XML\ because we +know when we're doing one or the other. + +This manual is dedicated to Taco Hoekwater, one of the first \CONTEXT\ users, and +also the first to use it for processing \XML. Who could have thought at that time +that we would have a more convenient way of dealing with those angle brackets. +The second version for this manual is dedicated to Thomas Schmitz, a power user +who occasionally became victim of the evolving mechanisms. + +\blank + +\startlines +Hans Hagen +\PRAGMA +Hasselt NL +2008\endash2016 +\stoplines + +\stopchapter + +\stopfrontmatter + +\startbodymatter + +\startchapter[title={Setting up a converter}] + +\startsection[title={from structure to setup}] + +We use a very simple document structure for demonstrating how a converter is +defined. In practice a mapping will be more complex, especially when we have a +style with complect chapter openings using data coming from all kind of places, +different styling of sections with the same name, selectively (out of order) +flushed content, special formatting, etc. + +\typefile{manual-demo-1.xml} + +Say that this document is stored in the file \type {demo.xml}, then the following +code can be used as starting point: + +\starttyping +\startxmlsetups xml:demo:base + \xmlsetsetup{#1}{*}{-} + \xmlsetsetup{#1}{document|section|p}{xml:demo:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{demo}{xml:demo:base} + +\startxmlsetups xml:demo:document + \starttitle[title={Contents}] + \placelist[chapter] + \stoptitle + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:demo:section + \startchapter[title=\xmlfirst{#1}{/title}] + \xmlfirst{#1}{/content} + \stopchapter +\stopxmlsetups + +\startxmlsetups xml:demo:p + \xmlflush{#1}\endgraf +\stopxmlsetups + +\xmlprocessfile{demo}{demo.xml}{} +\stoptyping + +Watch out! These are not just setups, but specific \XML\ setups which get an +argument passed (the \type {#1}). If for some reason your \XML\ processing fails, +it might be that you mistakenly have used a normal setup definition. The argument +\type {#1} represents the current node (element) and is a unique identifier. For +instance a \type {<p>..</p>} can have an identifier {demo::5}. So, we can get +something: + +\starttyping +\xmlflush{demo::5}\endgraf +\stoptyping + +but as well: + +\starttyping +\xmlflush{demo::6}\endgraf +\stoptyping + +Keep in mind that the actual node references are abstractions, you never see +those \type {<id>::<number>}'s, because we will use either the abstract \type +{#1} (any node) or an explicit reference like \type {demo}. The previous setup +when issued will be like: + +\starttyping +\startchapter[title=\xmlfirst{demo::3}{/title}] + \xmlfirst{demo::4}{/content} +\stopchapter +\stoptyping + +Here the \type {title} is used to typeset the chapter title but also for an entry +in the table of contents. At the moment the title is typeset the \XML\ node gets +looked up and expanded in real text. However, for the list it gets stored for +later use. One can argue that this is not needed for \XML, because one can just +filter all the titles and use page references, but then one also looses the +control one normally has over such titles. For instance it can be that some +titles are rendered differently and for that we need to keep track of usage. +Doing that with transformations or filtering is often more complex than leaving +that to \TEX. As soon as the list gets typeset, the reference (\type {demo::#3}) +is used for the lookup. This is because by default the title is stored as given. +So, as long as we make sure the \XML\ source is loaded before the table of +contents is typeset we're ok. Later we will look into this on more detail, for +now it's enough to know that in most cases the abstract \type {#1} reference will +work out ok. + +Contrary to the style definitions this interface looks rather low level (with no +optional arguments) and the main reason for this is that we want processing to be +fast. So, the basic framework is: + +\starttyping +\startxmlsetups xml:demo:base + % associate setups with elements +\stopxmlsetups + +\xmlregisterdocumentsetup{demo}{xml:demo:base} + +% define setups for matches + +\xmlprocessfile{demo}{demo.xml}{} +\stoptyping + +In this example we mostly just flush the content of an element and in the case of +a section we flush explicit child elements. The \type {#1} in the example code +represents the current element. The line: + +\starttyping +\xmlsetsetup{demo}{*}{-} +\stoptyping + +sets the default for each element to \quote {just ignore it}. A \type {+} would +make the default to always flush the content. This means that at this point we +only handle: + +\starttyping +<section> + <title>Some title</title> + <content> + <p>a paragraph of text</p> + </content> +</section> +\stoptyping + +In the next section we will deal with the slightly more complex itemize and +figure placement. At first sight all these setups may look overkill but keep in +mind that normally the number of elements is rather limited. The complexity is +often in the style and having access to each snippet of content is actually +quite handy for that. + +\stopsection + +\startsection[title={alternative solutions}] + +Dealing with an itemize is rather simple (as long as we forget about +attributes that control the behaviour): + +\starttyping +<itemize> + <item>first</item> + <item>second</item> +</itemize> +\stoptyping + +First we need to add \type {itemize} to the setup assignment (unless we've used +the wildcard \type {*}): + +\starttyping +\xmlsetsetup{demo}{document|section|p|itemize}{xml:demo:*} +\stoptyping + +The setup can look like: + +\starttyping +\startxmlsetups xml:demo:itemize + \startitemize + \xmlfilter{#1}{/item/command(xml:demo:itemize:item)} + \stopitemize +\stopxmlsetups + +\startxmlsetups xml:demo:itemize:item + \startitem + \xmlflush{#1} + \stopitem +\stopxmlsetups +\stoptyping + +An alternative is to map item directly: + +\starttyping +\xmlsetsetup{demo}{document|section|p|itemize|item}{xml:demo:*} +\stoptyping + +and use: + +\starttyping +\startxmlsetups xml:demo:itemize + \startitemize + \xmlflush{#1} + \stopitemize +\stopxmlsetups + +\startxmlsetups xml:demo:item + \startitem + \xmlflush{#1} + \stopitem +\stopxmlsetups +\stoptyping + +Sometimes, a more local solution using filters and \type {/command(...)} makes more +sense, especially when the \type {item} tag is used for other purposes as well. + +Explicit flushing with \type {command} is definitely the way to go when you have +complex products. In one of our projects we compose math school books from many +thousands of small \XML\ files, and from one source set several products are +typeset. Within a book sections get done differently, content gets used, ignored +or interpreted differently depending on the kind of content, so there is a +constant checking of attributes that drive the rendering. In that a generic setup +for a title element makes less sense than explicit ones for each case. (We're +talking of huge amounts of files here, including multiple images on each rendered +page.) + +When using \type {command} you can pass two arguments, the first is the setup for +the match, the second one for the miss, as in: + +\starttyping +\xmlfilter{#1}{/element/command(xml:true,xml:false)} +\stoptyping + +Back to the example, this leaves us with dealing with the resources, like +figures: + +\starttyping +<resource type='figure'> + <caption>A picture of a cow.</caption> + <content><external file="cow.pdf"/></content> +</resource> +\stoptyping + +Here we can use a more restricted match: + +\starttyping +\xmlsetsetup{demo}{resource[@type='figure']}{xml:demo:figure} +\xmlsetsetup{demo}{external}{xml:demo:*} +\stoptyping + +and the definitions: + +\starttyping +\startxmlsetups xml:demo:figure + \placefigure + {\xmlfirst{#1}{/caption}} + {\xmlfirst{#1}{/content}} +\stopxmlsetups + +\startxmlsetups xml:demo:external + \externalfigure[\xmlatt{#1}{file}] +\stopxmlsetups +\stoptyping + +At this point it is good to notice that \type {\xmlatt{#1}{file}} is passed as it +is: a macro call. This means that when a macro like \type {\externalfigure} uses +the first argument frequently without first storing its value, the lookup is done +several times. A solution for this is: + +\starttyping +\startxmlsetups xml:demo:external + \expanded{\externalfigure[\xmlatt{#1}{file}]} +\stopxmlsetups +\stoptyping + +Because the lookup is rather fast, normally there is no need to bother about this +too much because internally \CONTEXT\ already makes sure such expansion happens +only once. + +An alternative definition for placement is the following: + +\starttyping +\xmlsetsetup{demo}{resource}{xml:demo:resource} +\stoptyping + +with: + +\starttyping +\startxmlsetups xml:demo:resource + \placefloat + [\xmlatt{#1}{type}] + {\xmlfirst{#1}{/caption}} + {\xmlfirst{#1}{/content}} +\stopxmlsetups +\stoptyping + +This way you can specify \type {table} as type too. Because you can define your +own float types, more complex variants are also possible. In that case it makes +sense to provide some default behaviour too: + +\starttyping +\definefloat[figure-here][figure][default=here] +\definefloat[figure-left][figure][default=left] +\definefloat[table-here] [table] [default=here] +\definefloat[table-left] [table] [default=left] + +\startxmlsetups xml:demo:resource + \placefloat + [\xmlattdef{#1}{type}{figure}-\xmlattdef{#1}{location}{here}] + {\xmlfirst{#1}{/caption}} + {\xmlfirst{#1}{/content}} +\stopxmlsetups +\stoptyping + +In this example we support two types and two locations. We default to a figure +placed (when possible) at the current location. + +\stopsection + +\stopchapter + +\startchapter[title={Filtering content}] + +\startsection[title={\TEX\ versus \LUA}] + +It will not come as a surprise that we can access \XML\ files from \TEX\ as well +as from \LUA. In fact there are two methods to deal with \XML\ in \LUA. First +there are the low level \XML\ functions in the \type {xml} namespace. On top of +those functions there is a set of functions in the \type {lxml} namespace that +deals with \XML\ in a more \TEX ie way. Most of these have similar commands at +the \TEX\ end. + +\startbuffer +\startxmlsetups first:demo:one + \xmlfilter {#1} {artist/name[text()='Randy Newman']/.. + /albums/album[position()=3]/command(first:demo:two)} +\stopxmlsetups + +\startxmlsetups first:demo:two + \blank \start \tt + \xmldisplayverbatim{#1} + \stop \blank +\stopxmlsetups + +\xmlprocessfile{demo}{music-collection.xml}{first:demo:one} +\stopbuffer + +\typebuffer + +This gives the following snippet of verbatim \XML\ code. The indentation is +conform the indentation in the whole \XML\ file. \footnote {The (probably +outdated) \XML\ file contains the collection stores on my slimserver instance. +You can use the \type {mtxrun --script flac} to generate such files.} + +\doifmodeelse {atpragma} { + \getbuffer +} { + \typefile{xml-mkiv-01.xml} +} + +An alternative written in \LUA\ looks as follows: + +\startbuffer +\blank \start \tt \startluacode + local m = lxml.load("mine","music-collection.xml") -- m == lxml.id("mine") + local p = "artist/name[text()='Randy Newman']/../albums/album[position()=4]" + local l = lxml.filter(m,p) -- returns a list (with one entry) + lxml.displayverbatim(l[1]) +\stopluacode \stop \blank +\stopbuffer + +\typebuffer + +This produces: + +\doifmodeelse {atpragma} { + \getbuffer +} { + \typefile{xml-mkiv-02.xml} +} + +You can use both methods mixed but in practice we will use the \TEX\ commands in +regular styles and the mixture in modules, for instance in those dealing with +\MATHML\ and cals tables. For complex matters you can write your own finalizers +(the last action to be taken in a match) in \LUA\ and use them at the \TEX\ end. + +\stopsection + +\startsection[title={a few details}] + +In \CONTEXT\ setups are a rather common variant on macros (\TEX\ commands) but +with their own namespace. An example of a setup is: + +\starttyping +\startsetup doc:print + \setuppapersize[A4][A4] +\stopsetup + +\startsetup doc:screen + \setuppapersize[S6][S4] +\stopsetup +\stoptyping + +Given the previous definitions, later on we can say something like: + +\starttyping +\doifmodeelse {paper} { + \setup[doc:print] +} { + \setup[doc:screen] +} +\stoptyping + +Another example is: + +\starttyping +\startsetup[doc:header] + \marking[chapter] + \space + -- + \space + \pagenumber +\stopsetup +\stoptyping + +in combination with: + +\starttyping +\setupheadertexts[\setup{doc:header}] +\stoptyping + +Here the advantage is that instead of ending up with an unreadable header +definitions, we use a nicely formatted setup. An important property of setups and +the reason why they were introduced long ago is that spaces and newlines are +ignored in the definition. This means that we don't have to worry about so called +spurious spaces but it also means that when we do want a space, we have to use +the \type {\space} command. + +The only difference between setups and \XML\ setups is that the later ones get an +argument (\type {#1}) that reflects the current node in the \XML\ tree. + +\stopsection + +\startsection[title={CDATA}] + +What to do with \type {CDATA}? There are a few methods at tle \LUA\ end for +dealing with it but here we just mention how you can influence the rendering. +There are four macros that play a role here: + +\starttyping +\unexpanded\def\xmlcdataobeyedline {\obeyedline} +\unexpanded\def\xmlcdataobeyedspace{\strut\obeyedspace} +\unexpanded\def\xmlcdatabefore {\begingroup\tt} +\unexpanded\def\xmlcdataafter {\endgroup} +\stoptyping + +Technically you can overload them but beware of side effects. Normally you won't +see much \type {CDATA} and whenever we do, it involves special data that needs +very special treatment anyway. + +\stopsection + +\startsection[title={Entities}] + +As usual with any way of encoding documents you need escapes in order to encode +the characters that are used in tagging the content, embedding comments, escaping +special characters in strings (in programming languages), etc. In \XML\ this +means that in order characters like \type {<} you need an escape like \type +{<} and in order then to encode an \type {&} you need \type {&}. + +In a typesetting workflow using a programming language like \TEX, another problem +shows up. There we have different special characters, like \type {$ $} for triggering +math, but also the backslash, braces etc. Even one such special character is already +enough to have yet another escaping mechanism at work. + +Ideally a user should not worry about these issues but it helps figuring out issues +when you know what happens under the hood. Also it is good to know that in the +code there are several ways to deal with these issues. Take the following document: + +\starttyping +<text> + Here we have a bit of a <&mess>: + + # # + % % + \ \ + { { + | | + } } + ~ ~ +</text> +\stoptyping + +When the file is read the \type {<} entity will be replaced by \type {<} and +the \type {>} by \type {>}. The numeric entities will be replaced by the +characters they refer to. The \type {&mess} is kind of special. We do preload +a huge list of more of less standardized entities but \type {mess} is not in +there. However, it is possible to have it defined in the document preamble, like: + +\starttyping +<!DOCTYPE dummy SYSTEM "dummy.dtd" [ + <!ENTITY mess "what a mess" > +]> +\stoptyping + +or even this: + +\starttyping +<!DOCTYPE dummy SYSTEM "dummy.dtd" [ + <!ENTITY mess "<p>what a mess</p>" > +]> +\stoptyping + +You can also define it in your document style using one of: + +\startxmlcmd {\cmdbasicsetup{xmlsetentity}} + replaces entity with name \cmdinternal {cd:name} by \cmdinternal {cd:text} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmltexentity}} + replaces entity with name \cmdinternal {cd:name} by \cmdinternal {cd:text} + typeset under a \TEX\ regime +\stopxmlcmd + +Such a definition will always have a higher priority than the one defined +in the document. Anyway, when the document is read in all entities are +resolved and those that need a special treatment because they map to some +text are stored in such a way that we can roundtrip them. As a consequence, +as soon as the content gets pushed into \TEX, we need not only to intercept +special characters but also have to make sure that the following works: + +\starttyping +\xmltexentity {tex} {\TEX} +\stoptyping + +Here the backslash starts a control sequence while in regular content a +backslash is just that: a backslash. + +Special characters are really special when we have to move text around +in a \TEX\ ecosystem. + +\starttyping +<text> + <title>About #3</title> +</text> +\stoptyping + +If we map and define title as follows: + +\starttyping +\startxmlsetup xml:title + \title{\xmlflush{#1}} +\stopxmlsetup +\stoptyping + +normally something \type {\xmlflush {id::123}} will be written to the +auxiliary file and in most cases that is quite okay, but if we have this: + +\starttyping +\setuphead[title][expansion=yes] +\stoptyping + +then we don't want the \type {#} to end up as hash because later on \TEX\ +can get very confused about it because it sees some argument then in a +probably unexpected way. This is solved by escaping the hash like this: + +\starttyping +About \Ux{23}3 +\stoptyping + +The \type {\Ux} command will convert its hexadecimal argument into a +character. Of course one then needs to typeset such a text under a \TEX\ +character regime but that is normally the case anyway. + +\stopsection + +\stopchapter + +\startchapter[title={Commands}] + +\startsection[title={nodes and lpaths}] + +The amount of commands available for manipulating the \XML\ file is rather large. +Many of the commands cooperate with the already discussed setups, a fancy name +for a collection of macro calls either or not mixed with text. + +Most of the commands are just shortcuts to \LUA\ calls, which means that the real +work is done by \LUA. In fact, what happens is that we have a continuous transfer +of control from \TEX\ to \LUA, where \LUA\ prints back either data (like element +content or attribute values) or just invokes a setup whereby it passes a +reference to the node resolved conform the path expression. The invoked setup +itself might return control to \LUA\ again, etc. + +This sounds complicated but examples will show what we mean here. First we +present the whole repertoire of commands. Because users can read the source code, +they might uncover more commands, but only the ones discussed here are official. +The commands are grouped in categories. + +In the following sections \cmdinternal {cd:node} means a reference to a node: +this can be the identifier of the root (the loaded xml tree) or a reference to a +node in that tree (often the result of some lookup. A \cmdinternal {cd:lpath} is +a fancy name for a path expression (as with \XSLT) but resolved by \LUA. + +\stopsection + +\startsection[title={commands}] + +There are a lot of commands available but you probably can ignore most of them. +We try to be complete which means that there is for instance \type {\xmlfirst} as +well as \type {\xmllast} but you probably never need the last one. There are also +commands that were used when testing this interface and we see no reason to +remove them. Some obscure ones are used in modules and after a while even I often +forget that they exist. To give you an idea of what commands are important we +show their use in generating the \CONTEXT\ command definitions (\type +{x-set-11.mkiv}) per Januari 2016: + +\startcolumns[n=2,balance=yes] +\starttabulate[|l|r|] +\NC \type {\xmlall} \NC 1 \NC \NR +\NC \type {\xmlatt} \NC 23 \NC \NR +\NC \type {\xmlattribute} \NC 1 \NC \NR +\NC \type {\xmlcount} \NC 1 \NC \NR +\NC \type {\xmldoif} \NC 2 \NC \NR +\NC \type {\xmldoifelse} \NC 1 \NC \NR +\NC \type {\xmlfilterlist} \NC 4 \NC \NR +\NC \type {\xmlflush} \NC 5 \NC \NR +\NC \type {\xmlinclude} \NC 1 \NC \NR +\NC \type {\xmlloadonly} \NC 1 \NC \NR +\NC \type {\xmlregisterdocumentsetup} \NC 1 \NC \NR +\NC \type {\xmlsetsetup} \NC 1 \NC \NR +\NC \type {\xmlsetup} \NC 4 \NC \NR +\stoptabulate +\stopcolumns + +As you can see filtering, flushing and accessing attributes score high. Below we show +the statistics of a quite complex rendering (5 variants of schoolbooks: basic book, +answers, teachers guide, worksheets, full blown version with extensive tracing). + +\startcolumns[n=2,balance=yes] +\starttabulate[|l|r|] +\NC \type {\xmladdindex} \NC 3 \NC \NR +\NC \type {\xmlall} \NC 5 \NC \NR +\NC \type {\xmlappendsetup} \NC 1 \NC \NR +\NC \type {\xmlapplyselectors} \NC 1 \NC \NR +\NC \type {\xmlatt} \NC 40 \NC \NR +\NC \type {\xmlattdef} \NC 9 \NC \NR +\NC \type {\xmlattribute} \NC 10 \NC \NR +\NC \type {\xmlbadinclusions} \NC 3 \NC \NR +\NC \type {\xmlconcat} \NC 3 \NC \NR +\NC \type {\xmlcount} \NC 1 \NC \NR +\NC \type {\xmldelete} \NC 11 \NC \NR +\NC \type {\xmldoif} \NC 39 \NC \NR +\NC \type {\xmldoifelse} \NC 28 \NC \NR +\NC \type {\xmldoifelsetext} \NC 13 \NC \NR +\NC \type {\xmldoifnot} \NC 2 \NC \NR +\NC \type {\xmldoifnotselfempty} \NC 1 \NC \NR +\NC \type {\xmlfilter} \NC 100 \NC \NR +\NC \type {\xmlfirst} \NC 51 \NC \NR +\NC \type {\xmlflush} \NC 69 \NC \NR +\NC \type {\xmlflushcontext} \NC 2 \NC \NR +\NC \type {\xmlinclude} \NC 1 \NC \NR +\NC \type {\xmlincludeoptions} \NC 5 \NC \NR +\NC \type {\xmlinclusion} \NC 16 \NC \NR +\NC \type {\xmlinjector} \NC 1 \NC \NR +\NC \type {\xmlloaddirectives} \NC 1 \NC \NR +\NC \type {\xmlmapvalue} \NC 4 \NC \NR +\NC \type {\xmlmatch} \NC 1 \NC \NR +\NC \type {\xmlprependsetup} \NC 5 \NC \NR +\NC \type {\xmlregisterdocumentsetup} \NC 2 \NC \NR +\NC \type {\xmlregistersetup} \NC 1 \NC \NR +\NC \type {\xmlremapnamespace} \NC 1 \NC \NR +\NC \type {\xmlsetfunction} \NC 2 \NC \NR +\NC \type {\xmlsetinjectors} \NC 2 \NC \NR +\NC \type {\xmlsetsetup} \NC 11 \NC \NR +\NC \type {\xmlsetup} \NC 76 \NC \NR +\NC \type {\xmlstrip} \NC 1 \NC \NR +\NC \type {\xmlstripanywhere} \NC 1 \NC \NR +\NC \type {\xmltag} \NC 1 \NC \NR +\NC \type {\xmltext} \NC 53 \NC \NR +\NC \type {\xmlvalue} \NC 2 \NC \NR +\stoptabulate +\stopcolumns + +Here many more are used but this is an exceptional case. The top is again +dominated by filtering, flushing and attribute consulting. The list can actually +be smaller. For instance, the \type {\xmlcount} can just as well be \type +{\xmlfilter} with a \type {count} finalizer. There are also some special ones, +like the injectors, that are needed for finetuning the final result. + +\stopsection + +\startsection[title={loading}] + +\startxmlcmd {\cmdbasicsetup{xmlloadfile}} + loads the file \cmdinternal {cd:file} and registers it under \cmdinternal + {cd:name} and applies either given or standard \cmdinternal + {cd:xmlsetup} (alias: \type {\xmlload}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlloadbuffer}} + loads the buffer \cmdinternal {cd:buffer} and registers it under + \cmdinternal {cd:name} and applies either given or standard + \cmdinternal {cd:xmlsetup} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlloaddata}} + loads \cmdinternal {cd:text} and registers it under \cmdinternal + {cd:name} and applies either given or standard \cmdinternal + {cd:xmlsetup} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlloadonly}} + loads \cmdinternal {cd:text} and registers it under \cmdinternal + {cd:name} and applies either given or standard \cmdinternal + {cd:xmlsetup} but doesn't flush the content +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlinclude}} + includes the file specified by attribute \cmdinternal {cd:name} of the + element located by \cmdinternal {cd:lpath} at node \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlprocessfile}} + registers file \cmdinternal {cd:file} as \cmdinternal {cd:name} and + process the tree starting with \cmdinternal {cd:xmlsetup} (alias: + \type {\xmlprocess}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlprocessbuffer}} + registers buffer \cmdinternal {cd:name} as \cmdinternal {cd:name} and process + the tree starting with \cmdinternal {cd:xmlsetup} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlprocessdata}} + registers \cmdinternal {cd:text} as \cmdinternal {cd:name} and process + the tree starting with \cmdinternal {cd:xmlsetup} +\stopxmlcmd + +The initial setup defaults to \type {xml:process} that is defined +as follows: + +\starttyping +\startsetups xml:process + \xmlregistereddocumentsetups\xmldocument + \xmlmain\xmldocument +\stopsetups +\stoptyping + +First we apply the setups associated with the document (including common setups) +and then we flush the whole document. The macro \type {\xmldocument} expands to +the current document id. There is also \type {\xmlself} which expands to the +current node number (\type {#1} in setups). + +\startxmlcmd {\cmdbasicsetup{xmlmain}} + returns the whole documents +\stopxmlcmd + +Normally such a flush will trigger a chain reaction of setups associated with the +child elements. + +\stopsection + +\startsection[title={saving}] + +\startxmlcmd {\cmdbasicsetup{xmlsave}} + saves the given node \cmdinternal {cd:node} in the file \cmdinternal {cd:file} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmltofile}} + saves the match of \cmdinternal {cd:lpath} in the file \cmdinternal {cd:file} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmltobuffer}} + saves the match of \cmdinternal {cd:lpath} in the buffer \cmdinternal {cd:buffer} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmltobufferverbose}} + saves the match of \cmdinternal {cd:lpath} verbatim in the buffer \cmdinternal + {cd:buffer} +\stopxmlcmd + +% \startxmlcmd {\cmdbasicsetup{xmltoparameters}} +% converts the match of \cmdinternal {cd:lpath} to key|/|values (for tracing) +% \stopxmlcmd + +The next command is only needed when you have messed with the tree using +\LUA\ code. + +\startxmlcmd {\cmdbasicsetup{xmladdindex}} + (re)indexes a tree +\stopxmlcmd + +The following macros are only used in special situations and are not really meant +for users. + +\startxmlcmd {\cmdbasicsetup{xmlraw}} + flush the content if \cmdinternal {cd:node} with original entities +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{startxmlraw}} + flush the wrapped content with original entities +\stopxmlcmd + +\stopsection + +\startsection[title={flushing data}] + +When we flush an element, the associated \XML\ setups are expanded. The most +straightforward way to flush an element is the following. Keep in mind that the +returned valus itself can trigger setups and therefore flushes. + +\startxmlcmd {\cmdbasicsetup{xmlflush}} + returns all nodes under \cmdinternal {cd:node} +\stopxmlcmd + +You can restrict flushing by using commands that accept a specification. + +\startxmlcmd {\cmdbasicsetup{xmltext}} + returns the text of the matching \cmdinternal {cd:lpath} under \cmdinternal + {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlnonspace}} + returns the text of the matching \cmdinternal {cd:lpath} under \cmdinternal + {cd:node} without embedded spaces +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlall}} + returns all nodes under \cmdinternal {cd:node} that matches \cmdinternal + {cd:lpath} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmllastmatch}} + returns all nodes found in the last match +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlfirst}} + returns the first node under \cmdinternal {cd:node} that matches \cmdinternal + {cd:lpath} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmllast}} + returns the last node under \cmdinternal {cd:node} that matches \cmdinternal + {cd:lpath} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlfilter}} + at a match of \cmdinternal {cd:lpath} a given filter \type {filter} is applied + and the result is returned +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlsnippet}} + returns the \cmdinternal {cd:number}\high{th} element under \cmdinternal + {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlposition}} + returns the \cmdinternal {cd:number}\high{th} match of \cmdinternal + {cd:lpath} at node \cmdinternal {cd:node}; a negative number starts at the + end (alias: \type {\xmlindex}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlelement}} + returns the \cmdinternal {cd:number}\high{th} child of node \cmdinternal {cd:node}; + a negative number starts at the end +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlpos}} + returns the index (position) in the parent node of \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlconcat}} + returns the sequence of nodes that match \cmdinternal {cd:lpath} at + \cmdinternal {cd:node} whereby \cmdinternal {cd:text} is put between each + match +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlconcatrange}} + returns the \cmdinternal {cd:first}\high {th} upto \cmdinternal + {cd:last}\high {th} of nodes that match \cmdinternal {cd:lpath} at + \cmdinternal {cd:node} whereby \cmdinternal {cd:text} is put between each + match +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlcommand}} + apply the given \cmdinternal {cd:xmlsetup} to each match of \cmdinternal + {cd:lpath} at node \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlstrip}} + remove leading and trailing spaces from nodes under \cmdinternal {cd:node} + that match \cmdinternal {cd:lpath} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlstripped}} + remove leading and trailing spaces from nodes under \cmdinternal {cd:node} + that match \cmdinternal {cd:lpath} and return the content afterwards +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlstripnolines}} + remove leading and trailing spaces as well as collapse embedded spaces + from nodes under \cmdinternal {cd:node} that match \cmdinternal {cd:lpath} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlstrippednolines}} + remove leading and trailing spaces as well as collapse embedded spaces from + nodes under \cmdinternal {cd:node} that match \cmdinternal {cd:lpath} and + return the content afterwards +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlverbatim}} + flushes the content verbatim code (without any wrapping, i.e. no fonts + are selected and such) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlinlineverbatim}} + return the content of the node as inline verbatim code, that is no further + interpretation (expansion) takes place and spaces are honoured; it uses the + following wrapper +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{startxmlinlineverbatim}} + wraps inline verbatim mode using the environment specified (a prefix \type + {xml:} is added to the environment name) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldisplayverbatim}} + return the content the node as display verbatim code, that is no further + interpretation (expansion) takes place and leading and trailing spaces and + newlines are treated special; it uses the following wrapper +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{startxmldisplayverbatim}} + wraps the content in display verbatim using the environment specified (a prefix + \type {xml:} is added to the environment name) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlprettyprint}} + pretty print (with colors) the node \cmdinternal {cd:node}; use the \CONTEXT\ + \SCITE\ lexers when available (\type {\usemodule [scite]}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlflushspacewise}} + flush node \cmdinternal {cd:node} obeying spaces and newlines +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlflushlinewise}} + flush node \cmdinternal {cd:node} obeying newlines +\stopxmlcmd + +\stopsection + +\startsection[title={information}] + +The following commands return strings. Normally these are used in tests. + +\startxmlcmd {\cmdbasicsetup{xmlname}} + returns the complete name (including namespace prefix) of the + given \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlnamespace}} + returns the namespace of the given \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmltag}} + returns the tag of the element, without namespace prefix +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlcount}} + returns the number of matches of \cmdinternal {cd:lpath} at node \cmdinternal + {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlatt}} + returns the value of attribute \cmdinternal {cd:name} or empty if no such + attribute exists +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlattdef}} + returns the value of attribute \cmdinternal {cd:name} or \cmdinternal + {cd:string} if no such attribute exists +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlrefatt}} + returns the value of attribute \cmdinternal {cd:name} or empty if no such + attribute exists; a leading \type {#} is removed (nicer for tex) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlchainatt}} + returns the value of attribute \cmdinternal {cd:name} or empty if no such + attribute exists; backtracks till a match is found +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlchainattdef}} + returns the value of attribute \cmdinternal {cd:name} or \cmdinternal + {cd:string} if no such attribute exists; backtracks till a match is found +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlattribute}} + finds a first match for \cmdinternal {cd:lpath} at \cmdinternal {cd:node} and + returns the value of attribute \cmdinternal {cd:name} or empty if no such + attribute exists +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlattributedef}} + finds a first match for \cmdinternal {cd:lpath} at \cmdinternal {cd:node} and + returns the value of attribute \cmdinternal {cd:name} or \cmdinternal + {cd:text} if no such attribute exists +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmllastatt}} + returns the last attribute found (this avoids a lookup) +\stopxmlcmd + +\stopsection + +\startsection[title={manipulation}] + +You can use \LUA\ code to manipulate the tree and it makes no sense to duplicate +this in \TEX. In the future we might provide an interface to some of this +functionality. Keep in mind that manipuating the tree might have side effects as +we maintain several indices into the tree that also needs to be updated then. + +\stopsection + +\startsection[title={integration}] + +If you write a module that deals with \XML, for instance processing cals tables, +then you need ways to control specific behaviour. For instance, you might want to +add a background to the table. Such directives are collected in \XML\ files and +can be loaded on demand. + +\startxmlcmd {\cmdbasicsetup{xmlloaddirectives}} + loads \CONTEXT\ directives from \cmdinternal {cd:file} that will get + interpreted when processing documents +\stopxmlcmd + +A directives definition file looks as follows: + +\starttyping +<?xml version="1.0" standalone="yes"?> + +<directives> + <directive attribute='id' value="100" + setup="cdx:100"/> + <directive attribute='id' value="101" + setup="cdx:101"/> + <directive attribute='cdx' value="colors" element="cals:table" + setup="cdx:cals:table:colors"/> + <directive attribute='cdx' value="vertical" element="cals:table" + setup="cdx:cals:table:vertical"/> + <directive attribute='cdx' value="noframe" element="cals:table" + setup="cdx:cals:table:noframe"/> + <directive attribute='cdx' value="*" element="cals:table" + setup="cdx:cals:table:*"/> +</directives> +\stoptyping + +Examples of usage can be found in \type {x-cals.mkiv}. The directive is triggered +by an attribute. Instead of a setup you can specify a setup to be applied before +and after the node gets flushed. + +\startxmlcmd {\cmdbasicsetup{xmldirectives}} + apply the setups directive associated with the node +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldirectivesbefore}} + apply the before directives associated with the node +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldirectivesafter}} + apply the after directives associated with the node +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlinstalldirective}} + defines a directive that hooks into a handler +\stopxmlcmd + +Normally a directive will be put in the \XML\ file, for instance as: + +\starttyping +<?context-mathml-directive minus reduction yes ?> +\stoptyping + +Here the \type {mathml} is the general class of directives and \type {minus} a +subclass, in our case a specific element. + +\stopsection + +\startsection[title={setups}] + +The basic building blocks of \XML\ processing are setups. These are just +collections of macros that are expanded. These setups get one argument passed +(\type {#1}): + +\starttyping +\startxmlsetups somedoc:somesetup + \xmlflush{#1} +\stopxmlsetups +\stoptyping + +This argument is normally a number that internally refers to a specific node in +the \XML\ tree. The user should see it as an abstract reference and not depend on +its numeric property. Just think of it as \quote {the current node}. You can (and +probably will) call such setups using: + +\startxmlcmd {\cmdbasicsetup{xmlsetup}} + expands setup \cmdinternal {cd:setup} and pass \cmdinternal {cd:node} as + argument +\stopxmlcmd + +However, in most cases the setups are associated to specific elements, +something that users of \XSLT\ might recognize as templates. + +\startxmlcmd {\cmdbasicsetup{xmlsetfunction}} + associates function \cmdinternal {cd:luafunction} to the elements in + namespace \cmdinternal {cd:name} that match \cmdinternal {cd:lpath} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlsetsetup}} + associates setups (\TEX\ code) \cmdinternal {cd:setup} to the elements to + \cmdinternal {cd:node} that match \cmdinternal {cd:lpath} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlprependsetup}} + pushes \cmdinternal {cd:setup} to the front of global list of setups +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlappendsetup}} + adds \cmdinternal {cd:setup} to the global list of setups to be applied + (alias: \type{\xmlregistersetup}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlbeforesetup}} + pushes \cmdinternal {cd:setup} into the global list of setups; the + last setup is the position +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlaftersetup}} + adds \cmdinternal {cd:setup} to the global list of setups; the last setup + is the position +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlremovesetup}} + removes \cmdinternal {cd:setup} from the global list of setups +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlprependdocumentsetup}} + pushes \cmdinternal {cd:setup} to the front of list of setups to be applied + to \cmdinternal {cd:name} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlappenddocumentsetup}} + adds \cmdinternal {cd:setup} to the list of setups to be applied to + \cmdinternal {cd:name} (alias: \type{\xmlregisterdocumentsetup}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlbeforedocumentsetup}} + pushes \cmdinternal {cd:setup} into the setups to be applied to \cmdinternal + {cd:name}; the last setup is the position +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlafterdocumentsetup}} + adds \cmdinternal {cd:setup} to the setups to be applied to \cmdinternal + {cd:name}; the last setup is the position +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlremovedocumentsetup}} + removes \cmdinternal {cd:setup} from the global list of setups to be applied + to \cmdinternal {cd:name} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlresetsetups}} + removes all global setups +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlresetdocumentsetups}} + removes all setups from the \cmdinternal {cd:name} specific list of setups to + be applied +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlflushdocumentsetups}{setup}} + applies \cmdinternal {cd:setup} (can be a list) to \cmdinternal {cd:name} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlregisteredsetups}} + applies all global setups to the current document +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlregistereddocumentsetups}} + applies all document specific \cmdinternal {cd:setup} to document + \cmdinternal {cd:name} +\stopxmlcmd + +\stopsection + +\startsection[title={testing}] + +The following test macros all take a \cmdinternal {cd:node} as first argument +and an \cmdinternal {cd:lpath} as second: + +\startxmlcmd {\cmdbasicsetup{xmldoif}} + expands to \cmdinternal {cd:true} when \cmdinternal {cd:lpath} matches at + node \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifnot}} + expands to \cmdinternal {cd:true} when \cmdinternal {cd:lpath} does not match + at node \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifelse}{yes}} + expands to \cmdinternal {cd:true} when \cmdinternal {cd:lpath} matches at + node \cmdinternal {cd:node} and to \cmdinternal {cd:false} otherwise +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoiftext}} + expands to \cmdinternal {cd:true} when the node matching \cmdinternal + {cd:lpath} at node \cmdinternal {cd:node} has some content +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifnottext}} + expands to \cmdinternal {cd:true} when the node matching \cmdinternal + {cd:lpath} at node \cmdinternal {cd:node} has no content +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifelsetext}} + expands to \cmdinternal {cd:true} when the node matching \cmdinternal + {cd:lpath} at node \cmdinternal {cd:node} has content and to \cmdinternal + {cd:false} otherwise +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifelseempty}} + expands to \cmdinternal {cd:true} when the node matching \cmdinternal + {cd:lpath} at node \cmdinternal {cd:node} is empty and to \cmdinternal + {cd:false} otherwise +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifelseselfempty}} + expands to \cmdinternal {cd:true} when the node is empty and to \cmdinternal + {cd:false} otherwise +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifselfempty}} + expands to \cmdinternal {cd:true} when \cmdinternal {cd:node} is empty +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifnotselfempty}} + expands to \cmdinternal {cd:true} when \cmdinternal {cd:node} is not empty +\stopxmlcmd + +\stopsection + +\startsection[title={initialization}] + +The general setup command (not to be confused with setups) that deals with the +\MKIV\ tree handler is \type {\setupxml}. There are currently only a few options. + +\cmdfullsetup{setupxml} + +When you set \type {default} to \cmdinternal {cd:text} elements with no setup +assigned will end up as text. When set to \type {hidden} such elements will be +hidden. You can apply the default yourself using: + +\startxmlcmd {\cmdbasicsetup{xmldefaulttotext}} + presets the tree with root \cmdinternal {cd:node} to the handlers set up with + \type {\setupxml} option \cmdinternal{default} +\stopxmlcmd + +You can set \type {compress} to \type {yes} in which case comment is stripped +from the tree when the file is read. When \type {entities} is set to \type {yes} +(this is the default) entities are replaced. + +\startxmlcmd {\cmdbasicsetup{xmlregisterns}} + associates an internal namespace (like \type {mml}) with one given in the + document as \URL\ (like mathml) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlremapname}} + changes the namespace and tag of the matching elements +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlremapnamespace}} + replaces all references to the given namespace to a new one (applied + recursively) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlchecknamespace}} + sets the namespace of the matching elements unless a namespace is already set +\stopxmlcmd + +\stopsection + +\startsection[title={helpers}] + +Often an attribute will determine the rendering and this may result in many +tests. Especially when we have multiple attributes that control the output such +tests can become rather extensive and redundant because one gets $n\times m$ or +more such tests. + +Therefore we have a convenient way to map attributes onto for instance strings or +commands. + +\startxmlcmd {\cmdbasicsetup{xmlmapvalue}} + associate a \cmdinternal {cd:text} with a \cmdinternal {cd:category} and + \cmdinternal {cd:name} (alias: \type{\xmlmapval}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlvalue}} + expand the value associated with a \cmdinternal {cd:category} and + \cmdinternal {cd:name} and if not resolved, expand to the \cmdinternal + {cd:text} (alias: \type{\xmlval}) +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmldoifelsevalue}} + associate a \cmdinternal {cd:text} with a \cmdinternal {cd:category} and + \cmdinternal {cd:name} +\stopxmlcmd + +This is used as follows. We define a couple of mappings in the same category: + +\starttyping +\xmlmapvalue{emph}{bold} {\bf} +\xmlmapvalue{emph}{italic}{\it} +\stoptyping + +Assuming that we have associated the following setup with the \type {emph} +element, we can say (with \type {#1} being the current element): + +\starttyping +\startxmlsetups demo:emph + \begingroup + \xmlvalue{emph}{\xmlatt{#1}{type}}{} + \endgroup +\stopxmlsetups +\stoptyping + +In this case we have no default. The \type {type} attribute triggers the actions, +as in: + +\starttyping +normal <emph type='bold'>bold</emph> normal +\stoptyping + +This mechanism is not really bound to elements and attributes so you can use this +mechanism for other purposes as well. + +\stopsection + +\stopchapter + +\startchapter[title={Expressions and filters}] + +\startsection[title={path expressions}] + +In the previous chapters we used \cmdinternal {cd:lpath} expressions, which are a variant +on \type {xpath} expressions as in \XSLT\ but in this case more geared towards +usage in \TEX. This mechanisms will be extended when demands are there. + +A path is a sequence of matches. A simple path expression is: + +\starttyping +a/b/c/d +\stoptyping + +Here each \type {/} goes one level deeper. We can go backwards in a lookup with +\type {..}: + +\starttyping +a/b/../d +\stoptyping + +We can also combine lookups, as in: + +\starttyping +a/(b|c)/d +\stoptyping + +A negated lookup is preceded by a \type {!}: + +\starttyping +a/(b|c)/!d +\stoptyping + +A wildcard is specified with a \type {*}: + +\starttyping +a/(b|c)/!d/e/*/f +\stoptyping + +In addition to these tag based lookups we can use attributes: + +\starttyping +a/(b|c)/!d/e/*/f[@type=whatever] +\stoptyping + +An \type {@} as first character means that we are dealing with an attribute. +Within the square brackets there can be boolean expressions: + +\starttyping +a/(b|c)/!d/e/*/f[@type=whatever and @id>100] +\stoptyping + +You can use functions as in: + +\starttyping +a/(b|c)/!d/e/*/f[something(text()) == "oeps"] +\stoptyping + +There are a couple of predefined functions: + +\starttabulate[|l|l|p|] +\NC \type{rootposition} \type{order} \NC number \NC the index of the matched root element (kind of special) \NC \NR +\NC \type{position} \NC number \NC the current index of the matched element in the match list \NC \NR +\NC \type{match} \NC number \NC the current index of the matched element sub list with the same parent \NC \NR +\NC \type{first} \NC number \NC \NC \NR +\NC \type{last} \NC number \NC \NC \NR +\NC \type{index} \NC number \NC the current index of the matched element in its parent list \NC \NR +\NC \type{firstindex} \NC number \NC \NC \NR +\NC \type{lastindex} \NC number \NC \NC \NR +\NC \type{element} \NC number \NC the element's index \NC \NR +\NC \type{firstelement} \NC number \NC \NC \NR +\NC \type{lastelement} \NC number \NC \NC \NR +\NC \type{text} \NC string \NC the textual representation of the matched element \NC \NR +\NC \type{content} \NC table \NC the node of the matched element \NC \NR +\NC \type{name} \NC string \NC the full name of the matched element: namespace and tag \NC \NR +\NC \type{namespace} \type{ns} \NC string \NC the namespace of the matched element \NC \NR +\NC \type{tag} \NC string \NC the tag of the matched element \NC \NR +\NC \type{attribute} \NC string \NC the value of the attribute with the given name of the matched element \NC \NR +\stoptabulate + +There are fundamental differences between \type {position}, \type {match} and +\type {index}. Each step results in a new list of matches. The \type {position} +is the index in this new (possibly intermediate) list. The \type {match} is also +an index in this list but related to the specific match of element names. The +\type {index} refers to the location in the parent element. + +Say that we have: + +\starttyping +<collection> + <resources> + <manual> + <screen>.1.</screen> + <paper>.1.</paper> + </manual> + <manual> + <paper>.2.</paper> + <screen>.2.</screen> + </manual> + <resources> + <resources> + <manual> + <screen>.3.</screen> + <paper>.3.</paper> + </manual> + <resources> +<collection> +\stoptyping + +The following then applies: + +\starttabulate[|l|l|] +\NC \type {collection/resources/manual[position()==1]/paper} \NC \type{.1.} \NC \NR +\NC \type {collection/resources/manual[match()==1]/paper} \NC \type{.1.} \type{.3.} \NC \NR +\NC \type {collection/resources/manual/paper[index()==1]} \NC \type{.2.} \NC \NR +\stoptabulate + +In most cases the \type {position} test is more restrictive than the \type +{match} test. + +You can pass your own functions too. Such functions are defined in the the \type +{xml.expressions} namespace. We have defined a few shortcuts: + +\starttabulate[|l|l|] +\type {find(str,pattern)} \NC \type{string.find} \NC \NR +\type {contains(str)} \NC \type{string.find} \NC \NR +\type {oneof(str,...)} \NC is \type{str} in list \NC \NR +\type {upper(str)} \NC \type{characters.upper} \NC \NR +\type {lower(str)} \NC \type{characters.lower} \NC \NR +\type {number(str)} \NC \type{tonumber} \NC \NR +\type {boolean(str)} \NC \type{toboolean} \NC \NR +\type {idstring(str)} \NC removes leading hash \NC \NR +\type {name(index)} \NC full tag name \NC \NR +\type {tag(index)} \NC tag name \NC \NR +\type {namespace(index)} \NC namespace of tag \NC \NR +\type {text(index)} \NC content \NC \NR +\type {error(str)} \NC quit and show error \NC \NR +\type {quit()} \NC quit \NC \NR +\type {print()} \NC print message \NC \NR +\type {count(pattern)} \NC number of matches \NC \NR +\type {child(pattern)} \NC take child that matches \NC \NR +\stoptabulate + + +You can also use normal \LUA\ functions as long as you make sure that you pass +the right arguments. There are a few predefined variables available inside such +functions. + +\starttabulate[|Tl|l|p|] +\NC \type{list} \NC table \NC the list of matches \NC \NR +\NC \type{l} \NC number \NC the current index in the list of matches \NC \NR +\NC \type{ll} \NC element \NC the current element that matched \NC \NR +\NC \type{order} \NC number \NC the position of the root of the path \NC \NR +\stoptabulate + +The given expression between \type {[]} is converted to a \LUA\ expression so you +can use the usual ingredients: + +\starttyping +== ~= <= >= < > not and or () +\stoptyping + +In addition, \type {=} equals \type {==} and \type {!=} is the same as \type +{~=}. If you mess up the expression, you quite likely get a \LUA\ error message. + +\stopsection + +\startsection[title={functions as filters}] + +At the \LUA\ end a whole \cmdinternal {cd:lpath} expression results in a (set of) node(s) +with its environment, but that is hardly usable in \TEX. Think of code like: + +\starttyping +for e in xml.collected(xml.load('text.xml'),"title") do + -- e = the element that matched +end +\stoptyping + +The older variant is still supported but you can best use the previous variant. + +\starttyping +for r, d, k in xml.elements(xml.load('text.xml'),"title") do + -- r = root of the title element + -- d = data table + -- k = index in data table +end +\stoptyping + +Here \type {d[k]} points to the \type {title} element and in this case all titles +in the tree pass by. In practice this kind of code is encapsulated in function +calls, like those returning elements one by one, or returning the first or last +match. The result is then fed back into \TEX, possibly after being altered by an +associated setup. We've seen the wrappers to such functions already in a previous +chapter. + +In addition to the previously discussed expressions, one can add so called +filters to the expression, for instance: + +\starttyping +a/(b|c)/!d/e/text() +\stoptyping + +In a filter, the last part of the \cmdinternal {cd:lpath} expression is a function call. +The previous example returns the text of each element \type {e} that results from +matching the expression. When running \TEX\ the following functions are available. +Some are also also available when using pure \LUA. In \TEX\ you can often use one of +the macros like \type {\xmlfirst} instead of a \type {\xmlfilter} with finalizer +\type {first()}. The filter can be somewhat faster but that is hardly noticeable. + +\starttabulate[|l|l|p|] +\NC \type {context()} \NC string \NC the serialized text with \TEX\ catcode regime \NC \NR +%NC \type {ctxtext()} \NC string \NC \NC \NR +\NC \type {function()} \NC string \NC depends on the function \NC \NR +% +\NC \type {name()} \NC string \NC the (remapped) namespace \NC \NR +\NC \type {tag()} \NC string \NC the name of the element \NR +\NC \type {tags()} \NC list \NC the names of the element \NR +% +\NC \type {text()} \NC string \NC the serialized text \NC \NR +\NC \type {upper()} \NC string \NC the serialized text uppercased \NC \NR +\NC \type {lower()} \NC string \NC the serialized text lowercased \NC \NR +\NC \type {stripped()} \NC string \NC the serialized text stripped \NC \NR +\NC \type {lettered()} \NC string \NC the serialized text only letters (cf. \UNICODE) \NC \NR +% +\NC \type {count()} \NC number \NC the number of matches \NC \NR +\NC \type {index()} \NC number \NC the matched index in the current path \NC \NR +\NC \type {match()} \NC number \NC the matched index in the preceding path \NC \NR +% +%NC \type {lowerall()} \NC string \NC \NC \NR +%NC \type {upperall()} \NC string \NC \NC \NR +% +\NC \type {attribute(name)} \NC content \NC returns the attribute with the given name \NC \NR +\NC \type {chainattribute(name)} \NC content \NC sidem, but backtracks till one is found \NC \NR +\NC \type {command(name)} \NC content \NC expands the setup with the given name for each found element \NC \NR +\NC \type {position(n)} \NC content \NC processes the \type {n}\high{th} instance of the found element \NC \NR +\NC \type {all()} \NC content \NC processes all instances of the found element \NC \NR +%NC \type {default} \NC content \NC all \NC \NR +\NC \type {reverse()} \NC content \NC idem in reverse order \NC \NR +\NC \type {first()} \NC content \NC processes the first instance of the found element \NC \NR +\NC \type {last()} \NC content \NC processes the last instance of the found element \NC \NR +\NC \type {concat(...)} \NC content \NC concatinates the match \NC \NC \NR +\NC \type {concatrange(from,to,...)} \NC content \NC concatinates a range of matches \NC \NC \NR +\stoptabulate + +The extra arguments of the concatinators are: \type {separator} (string), \type +{lastseparator} (string) and \type {textonly} (a boolean). + +These filters are in fact \LUA\ functions which means that if needed more of them +can be added. Indeed this happens in some of the \XML\ related \MKIV\ modules, +for instance in the \MATHML\ processor. + +\stopsection + +\startsection[title={example}] + +The number of commands is rather large and if you want to avoid them this is +often possible. Take for instance: + +\starttyping +\xmlall{#1}{/a/b[position()>3]} +\stoptyping + +Alternatively you can use: + +\starttyping +\xmlfilter{#1}{/a/b[position()>3]/all()} +\stoptyping + +and actually this is also faster as internally it avoids a function call. Of +course in practice this is hardly measurable. + +In previous examples we've already seen quite some expressions, and it might be +good to point out that the syntax is modelled after \XSLT\ but is not quite the +same. The reason is that we started with a rather minimal system and have already +styles in use that depend on compatibility. + +\starttyping +namespace:// axis node(set) [expr 1]..[expr n] / ... / filter +\stoptyping + +When we are inside a \CONTEXT\ run, the namespace is \type {tex}. Hoewever, if +you want not to print back to \TEX\ you need to be more explicit. Say that we +typeset examns and have a (not that logical) structure like: + +\starttyping +<question> + <text>...</text> + <answer> + <item>one</item> + <item>two</item> + <item>three</item> + </answer> + <alternative> + <condition>true</condition> + <score>1</score> + </alternative> + <alternative> + <condition>false</condition> + <score>0</score> + </alternative> + <alternative> + <condition>true</condition> + <score>2</score> + </alternative> +</question> +\stoptyping + +Say that we typeset the questions with: + +\starttyping +\startxmlsetups question + \blank + score: \xmlfunction{#1}{totalscore} + \blank + \xmlfirst{#1}{text} + \startitemize + \xmlfilter{#1}{/answer/item/command(answer:item)} + \stopitemize + \endgraf + \blank +\stopxmlsetups +\stoptyping + +Each item in the answer results in a call to: + +\starttyping +\startxmlsetups answer:item + \startitem + \xmlflush{#1} + \endgraf + \xmlfilter{#1}{../../alternative[position()=rootposition()]/ + condition/command(answer:condition)} + \stopitem +\stopxmlsetups +\stoptyping + +\starttyping +\startxmlsetups answer:condition + \endgraf + condition: \xmlflush{#1} + \endgraf +\stopxmlsetups +\stoptyping + +Now, there are two rather special filters here. The first one involves +calculating the total score. As we look forward we use a function to deal with +this. + +\starttyping +\startluacode +function xml.functions.totalscore(root) + local score = 0 + for e in xml.collected(root,"/alternative") do + score = score + xml.filter(e,"xml:///score/number()") or 0 + end + tex.write(score) +end +\stopluacode +\stoptyping + +Watch how we use the namespace to keep the results at the \LUA\ end. + +The second special trick shown here is to limit a match using the current +position of the root (\type {#}) match. + +As you can see, a path expression can be more than just filtering a few nodes. At +the end of this manual you will find a bunch of examples. + +\stopsection + +\startsection[title={tables}] + +If you want to know how the internal \XML\ tables look you can print such a +table: + +\starttyping +print(table.serialize(e)) +\stoptyping + +This produces for instance: + +% s = xml.convert("<document><demo label='whatever'>some text</demo></document>") +% print(table.serialize(xml.filter(s,"demo")[1])) + +\starttyping +t={ + ["at"]={ + ["label"]="whatever", + }, + ["dt"]={ "some text" }, + ["ns"]="", + ["rn"]="", + ["tg"]="demo", +} +\stoptyping + +The \type {rn} entry is the renamed namespace (when renaming is applied). If you +see tags like \type {@pi@} this means that we don't have an element, but (in this +case) a processing instruction. + +\starttabulate[|l|p|] +\NC \type {@rt@} \NC the root element \NC \NR +\NC \type {@dd@} \NC document definition \NC \NR +\NC \type {@cm@} \NC comment, like \type {<!-- whatever -->} \NC \NR +\NC \type {@cd@} \NC so called \type {CDATA} \NC \NR +\NC \type {@pi@} \NC processing instruction, like \type {<?whatever we want ?>} \NC \NR +\stoptabulate + +There are many ways to deal with the content, but in the perspective of \TEX\ +only a few matter. + +\starttabulate[|l|p|] +\NC \type {xml.sprint(e)} \NC print the content to \TEX\ and apply setups if needed \NC \NR +\NC \type {xml.tprint(e)} \NC print the content to \TEX\ (serialize elements verbose) \NC \NR +\NC \type {xml.cprint(e)} \NC print the content to \TEX\ (used for special content) \NC \NR +\stoptabulate + +Keep in mind that anything low level that you uncover is not part of the official +interface unless mentioned in this manual. + +\stopsection + +\stopchapter + +\startchapter[title={Tips and tricks}] + +\startsection[title={Tracing}] + +It can be hard to debug code as much happens kind of behind the screens. +Therefore we have a couple of tracing options. Of course you can typeset some +status information, using for instance: + +\startxmlcmd {\cmdbasicsetup{xmlshow}} + typeset the tree given by \cmdinternal {cd:node} +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlinfo}} + typeset the name if the element given by \cmdinternal {cd:node} +\stopxmlcmd + +We also have a bunch of trackers that can be enabled, like: + +\starttyping +\enabletrackers[xml.show,xml.parse] +\stoptyping + +The full list (currently) is: + +\starttabulate[|lT|p|] +\NC xml.entities \NC show what entities are seen and replaced \NC \NR +\NC xml.path \NC show the result of parsing an lpath expression \NC \NR +\NC xml.parse \NC show stepwise resolving of expressions \NC \NR +\NC xml.profile \NC report all parsed lpath expressions (in the log) \NC \NR +\NC xml.remap \NC show what namespaces are remapped \NC \NR +\NC lxml.access \NC report errors with respect to resolving (symbolic) nodes \NC \NR +\NC lxml.comments \NC show the comments that are encountered (if at all) \NC \NR +\NC lxml.loading \NC show what files are loaded and converted \NC \NR +\NC lxml.setups \NC show what setups are being associated to elements \NC \NR +\stoptabulate + +In one of our workflows we produce books from \XML\ where the (educational) +content is organized in many small files. Each book has about 5~chapters and each +chapter is made of sections that contain text, exercises, resources, etc.\ and so +the document is assembled from thousands of files (don't worry, runtime inclusion +is pretty fast). In order to see where in the sources content resides we can +trace the filename. + +\startxmlcmd {\cmdbasicsetup{xmlinclusion}} + returns the file where the node comes from +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlinclusions}} + returns the list of files where the node comes from +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlinclusions}} + returns a list of files that were not included due to some problem +\stopxmlcmd + +Of course you have to make sure that these names end up somewhere visible, for +instance in the margin. + +\stopsection + +\startsection[title={Expansion}] + +For novice users the concept of expansion might sound frightening and to some +extend it is. However, it is important enough to spend some words on it here. + +It is good to realize that most setups are sort of immediate. When one setup is +issued, it can call another one and so on. Normally you won't notice that but +there are cases where that can be an problem. In \TEX\ you can define a macro, +take for instance: + +\starttyping +\startxmlsetups xml:foo + \def\foobar{\xmlfirst{#1}{/bar}} +\stopxmlsetups +\stoptyping + +you store the reference top node \type {bar} in \type {\foobar} maybe for later use. In +this case the content is not yet fetched, it will be done when \type {\foobar} is +called. + +\starttyping +\startxmlsetups xml:foo + \edef\foobar{\xmlfirst{#1}{/bar}} +\stopxmlsetups +\stoptyping + +Here the content of \type {bar} becomes the body of the macro. But what if +\type {bar} itself contains elements that also contain elements. When there +is a setup for \type {bar} it will be triggered and so on. + +When that setup looks like: + +\starttyping +\startxmlsetups xml:bar + \def\barfoo{\xmlflush{#1}} +\stopxmlsetups +\stoptyping + +Here we get something like: + +\starttyping +\foobar => {\def\barfoo{...}} +\stoptyping + +When \type {\barfoo} is not defined we get an error and when it is know and expands +to something weird we might also get an error. + +Especially when you don't know what content can show up, this can result in errors +when an expansion fails, for example because some macro being used is not defined. +To prevent this we can define a macro: + +\starttyping +\starttexdefinition unexpanded xml:bar:macro #1 + \def\barfoo{\xmlflush{#1}} +\stoptexdefinition + +\startxmlsetups xml:bar + \texdefinition{xml:bar:macro}{#1} +\stopxmlsetups +\stoptyping + +The setup \type {xml:bar} will still expand but the replacement text now is just the +call to the macro, think of: + +\starttyping +\foobar => {\texdefinition{xml:bar:macro}{#1}} +\stoptyping + +But this is often not needed, most \CONTEXT\ commands can handle the expansions +quite well but it's good to know that there is a away out. So, now to some +examples. Imagine that we have an \XML\ file that looks as follows: + +\starttyping +<?xml version='1.0' ?> +<demo> + <chapter> + <title>Some <em>short</em> title</title> + <content> + zeta + <index> + <key>zeta</key> + <content>zeta again</content> + </index> + alpha + <index> + <key>alpha</key> + <content>alpha <em>again</em></content> + </index> + gamma + <index> + <key>gamma</key> + <content>gamma</content> + </index> + beta + <index> + <key>beta</key> + <content>beta</content> + </index> + delta + <index> + <key>delta</key> + <content>delta</content> + </index> + done! + </content> + </chapter> +</demo> +\stoptyping + +There are a few structure related elements here: a chapter (with its list entry) +and some index entries. Both are multipass related and therefore travel around. +This means that when we let data end up in the auxiliary file, we need to make +sure that we end up with either expanded data (i.e.\ no references to the \XML\ +tree) or with robust forward and backward references to elements in the tree. + +Here we discuss three approaches (and more may show up later): pushing \XML\ into +the auxiliary file and using references to elements either or not with an +associated setup. We control the variants with a switch. + +\starttyping +\newcount\TestMode + +\TestMode=0 % expansion=xml +\TestMode=1 % expansion=yes, index, setup +\TestMode=2 % expansion=yes +\stoptyping + +We apply a couple of setups: + +\starttyping +\startxmlsetups xml:mysetups + \xmlsetsetup{\xmldocument}{demo|index|content|chapter|title|em}{xml:*} +\stopxmlsetups + +\xmlregistersetup{xml:mysetups} +\stoptyping + +The main document is processed with: + +\starttyping +\startxmlsetups xml:demo + \xmlflush{#1} + \subject{contents} + \placelist[chapter][criterium=all] + \subject{index} + \placeregister[index][criterium=all] + \page % else buffer is forgotten when placing header +\stopxmlsetups +\stoptyping + +First we show three alternative ways to deal with the chapter. The first case +expands the \XML\ reference so that we have an \XML\ stream in the auxiliary +file. This stream is processed as a small independent subfile when needed. The +second case registers a reference to the current element (\type {#1}). This means +that we have access to all data of this element, like attributes, title and +content. What happens depends on the given setup. The third variant does the same +but here the setup is part of the reference. + +\starttyping +\startxmlsetups xml:chapter + \ifcase \TestMode + % xml code travels around + \setuphead[chapter][expansion=xml] + \startchapter[title=eh: \xmltext{#1}{title}] + \xmlfirst{#1}{content} + \stopchapter + \or + % index is used for access via setup + \setuphead[chapter][expansion=yes,xmlsetup=xml:title:flush] + \startchapter[title=\xmlgetindex{#1}] + \xmlfirst{#1}{content} + \stopchapter + \or + % tex call to xml using index is used + \setuphead[chapter][expansion=yes] + \startchapter[title=hm: \xmlreference{#1}{xml:title:flush}] + \xmlfirst{#1}{content} + \stopchapter + \fi +\stopxmlsetups + +\startxmlsetups xml:title:flush + \xmltext{#1}{title} +\stopxmlsetups +\stoptyping + +We need to deal with emphasis and the content of the chapter. + +\starttyping +\startxmlsetups xml:em + \begingroup\em\xmlflush{#1}\endgroup +\stopxmlsetups + +\startxmlsetups xml:content + \xmlflush{#1} +\stopxmlsetups +\stoptyping + +A similar approach is followed with the index entries. Watch how we use the +numbered entries variant (in this case we could also have used just \type +{entries} and \type {keys}. + +\starttyping +\startxmlsetups xml:index + \ifcase \TestMode + \setupregister[index][expansion=xml,xmlsetup=] + \setstructurepageregister + [index] + [entries:1=\xmlfirst{#1}{content}, + keys:1=\xmltext{#1}{key}] + \or + \setupregister[index][expansion=yes,xmlsetup=xml:index:flush] + \setstructurepageregister + [index] + [entries:1=\xmlgetindex{#1}, + keys:1=\xmltext{#1}{key}] + \or + \setupregister[index][expansion=yes,xmlsetup=] + \setstructurepageregister + [index] + [entries:1=\xmlreference{#1}{xml:index:flush}, + keys:1=\xmltext{#1}{key}] + \fi +\stopxmlsetups + +\startxmlsetups xml:index:flush + \xmlfirst{#1}{content} +\stopxmlsetups +\stoptyping + +Instead of this flush, you can use the predefined setup \type {xml:flush} +unless it is overloaded by you. + +The file is processed by: + +\starttyping +\starttext + \xmlprocessfile{main}{test.xml}{} +\stoptext +\stoptyping + +We don't show the result here. If you're curious what the output is, you can test +it yourself. In that case it also makes sense to peek into the \type {test.tuc} +file to see how the information travels around. The \type {metadata} fields carry +information about how to process the data. + +The first case, the \XML\ expansion one, is somewhat special in the sense that +internally we use small pseudo files. You can control the rendering by tweaking +the following setups: + +\starttyping +\startxmlsetups xml:ctx:sectionentry + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:ctx:registerentry + \xmlflush{#1} +\stopxmlsetups +\stoptyping + +{\em When these methods work out okay the other structural elements will be +dealt with in a similar way.} + +\stopsection + +\startsection[title={Special cases}] + +Normally the content will be flushed under a special (so called) catcode regime. +This means that characters that have a special meaning in \TEX\ will have no such +meaning in an \XML\ file. If you want content to be treated as \TEX\ code, you can +use one of the following: + +\startxmlcmd {\cmdbasicsetup{xmlflushcontext}} + flush the given \cmdinternal {cd:node} using the \TEX\ character + interpretation scheme +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlcontext}} + flush the match of \cmdinternal {cd:lpath} for the given \cmdinternal + {cd:node} using the \TEX\ character interpretation scheme +\stopxmlcmd + +We use this in cases like: + +\starttyping +.... + \xmlsetsetup {#1} { + tm|texformula| + } {xml:*} +.... + +\startxmlsetups xml:tm + \mathematics{\xmlflushcontext{#1}} +\stopxmlsetups + +\startxmlsetups xml:texformula + \placeformula\startformula\xmlflushcontext{#1}\stopformula +\stopxmlsetups +\stoptyping + +\stopsection + +\startsection[title={collecting}] + +Say that your document has + +\starttyping +<table> + <tr> + <td>foo</td> + <td>bar<td> + </tr> +</table> +\stoptyping + +And that you need to convert that to \TEX\ speak like: + +\starttyping +\bTABLE + \bTR + \bTD foo \eTD + \bTD bar \eTD + \eTR +\eTABLE +\stoptyping + +A simple mapping is: + +\starttyping +\startxmlsetups xml:table + \bTABLE \xmlflush{#1} \eTABLE +\stopxmlsetups +\startxmlsetups xml:tr + \bTR \xmlflush{#1} \eTR +\stopxmlsetups +\startxmlsetups xml:td + \bTD \xmlflush{#1} \eTD +\stopxmlsetups +\stoptyping + +The \type {\bTD} command is a so called delimited command which means that +it picks up its argument by looking for an \type {\eTD}. For a simple case +like here this works quite well because the flush is inside the pair. This +is not the case in the following variant: + +\starttyping +\startxmlsetups xml:td:start + \bTD +\stopxmlsetups +\startxmlsetups xml:td:stop + \eTD +\stopxmlsetups +\startxmlsetups xml:td + \xmlsetup{#1}{xml:td:start} + \xmlflush{#1} + \xmlsetup{#1}{xml:td:stop} +\stopxmlsetups +\stoptyping + +When for some reason \TEX\ gets confused you can revert to a mechanism that +collects content. + +\starttyping +\startxmlsetups xml:td:start + \startcollect + \bTD + \stopcollect +\stopxmlsetups +\startxmlsetups xml:td:stop + \startcollect + \eTD + \stopcollect +\stopxmlsetups +\startxmlsetups xml:td + \startcollecting + \xmlsetup{#1}{xml:td:start} + \xmlflush{#1} + \xmlsetup{#1}{xml:td:stop} + \stopcollecting +\stopxmlsetups +\stoptyping + +You can even implement solutions that effectively do this: + +\starttyping +\startcollecting + \startcollect \bTABLE \stopcollect + \startcollect \bTR \stopcollect + \startcollect \bTD \stopcollect + \startcollect foo\stopcollect + \startcollect \eTD \stopcollect + \startcollect \bTD \stopcollect + \startcollect bar\stopcollect + \startcollect \eTD \stopcollect + \startcollect \eTR \stopcollect + \startcollect \eTABLE \stopcollect +\stopcollecting +\stoptyping + +Of course you only need to go that complex when the situation demands it. Here is +another weird one: + +\starttyping +\startcollecting + \startcollect \setupsomething[\stopcollect + \startcollect foo=\stopcollect + \startcollect FOO,\stopcollect + \startcollect bar=\stopcollect + \startcollect BAR,\stopcollect + \startcollect ]\stopcollect +\stopcollecting +\stoptyping + +\stopsection + +\startsection[title={Selectors and injectors}] + +This chapter describes a bit special feature, one that we needed for a project +where we could not touch the original content but could add specific sections for +our own purpose. Hopefully the example demonstrates its useability. + +\enabletrackers[lxml.selectors] + +\startbuffer[foo] +<?xml version="1.0" encoding="UTF-8"?> + +<?context-directive message info 1: this is a demo file ?> +<?context-message-directive info 2: this is a demo file ?> + +<one> + <two> + <?context-select begin t1 t2 t3 ?> + <three> + t1 t2 t3 + <?context-directive injector crlf t1 ?> + t1 t2 t3 + </three> + <?context-select end ?> + <?context-select begin t4 ?> + <four> + t4 + </four> + <?context-select end ?> + <?context-select begin t8 ?> + <four> + t8.0 + t8.0 + </four> + <?context-select end ?> + <?context-include begin t4 ?> + <!-- + <three> + t4.t3 + <?context-directive injector crlf t1 ?> + t4.t3 + </three> + --> + <three> + t3 + <?context-directive injector crlf t1 ?> + t3 + </three> + <?context-include end ?> + <?context-select begin t8 ?> + <four> + t8.1 + t8.1 + </four> + <?context-select end ?> + <?context-select begin t8 ?> + <four> + t8.2 + t8.2 + </four> + <?context-select end ?> + <?context-select begin t4 ?> + <four> + t4 + t4 + </four> + <?context-select end ?> + <?context-directive injector page t7 t8 ?> + foo + <?context-directive injector blank t1 ?> + bar + <?context-directive injector page t7 t8 ?> + bar + </two> +</one> +\stopbuffer + +\typebuffer[foo] + +First we show how to plug in a directive. Processing instructions like the +following are normally ignored by an \XML\ processor, unless they make sense +to it. + +\starttyping +<?context-directive message info 1: this is a demo file ?> +<?context-message-directive info 2: this is a demo file ?> +\stoptyping + +We can define a message handler as follows: + +\startbuffer +\def\MyMessage#1#2#3{\writestatus{#1}{#2 #3}} + +\xmlinstalldirective{message}{MyMessage} +\stopbuffer + +\typebuffer \getbuffer + +When this file is process you will see this on the console: + +\startbuffer +info > 1: this is a demo file +info > 2: this is a demo file +\stopbuffer + +The file has some sections that can be used or ignored. The recipe for +obeying \type {t1} and \type {t4} is the following: + +\startbuffer +\xmlsetinjectors[t1] +\xmlsetinjectors[t4] + +\startxmlsetups xml:initialize + \xmlapplyselectors{#1} + \xmlsetsetup {#1} { + one|two|three|four + } {xml:*} +\stopxmlsetups + +\xmlregistersetup{xml:initialize} + +\startxmlsetups xml:one + [ONE \xmlflush{#1} ONE] +\stopxmlsetups + +\startxmlsetups xml:two + [TWO \xmlflush{#1} TWO] +\stopxmlsetups + +\startxmlsetups xml:three + [THREE \xmlflush{#1} THREE] +\stopxmlsetups + +\startxmlsetups xml:four + [FOUR \xmlflush{#1} FOUR] +\stopxmlsetups +\stopbuffer + +\typebuffer \getbuffer + +This typesets: + +\startnarrower +\xmlprocessbuffer{main}{foo}{} +\stopnarrower + +The include coding is kind of special: it permits adding content (in a comment) +and ignoring the rest so that we indeed can add something withou tinterfering +with the original. Of course in a normal workflow such messy solutions are +not needed, but alas, often workflows are not that clean, especially when one +has no real control over the source. + +\startxmlcmd {\cmdbasicsetup{xmlsetinjectors}} + enables a list of injectors that will be used +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlresetinjectors}} + resets the list of injectors +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlinjector}} + expands an injection (command); normally this one is only used + (in some setup) or for testing +\stopxmlcmd + +\startxmlcmd {\cmdbasicsetup{xmlapplyselectors}} + analyze the tree \cmdinternal {cd:node} for marked sections that + will be injected +\stopxmlcmd + +We have some injections predefined: + +\starttyping +\startsetups xml:directive:injector:page + \page +\stopsetups + +\startsetups xml:directive:injector:column + \column +\stopsetups + +\startsetups xml:directive:injector:blank + \blank +\stopsetups +\stoptyping + +In the example we see: + +\starttyping +<?context-directive injector page t7 t8 ?> +\stoptyping + +When we \type {\xmlsetinjector[t7]} a pagebreak will injected in that spot. Tags +like \type {t7}, \type {t8} etc.\ can represent versions. + +\stopsection + +\stopchapter + +\startchapter[title={Lookups using lpaths}] + +\startsection[title={introduction}] + +There is not that much system in the following examples. They resulted from tests +with different documents. The current implementation evolved out if the +experimental code. For instance, I decided to add the multiple expressions in row +handling after a few email exchanges with Jean|-|Michel Huffen. + +One of the main differences between the way \XSLT\ resolves a path and our way is +the anchor. Take: + +\starttyping +/something +something +\stoptyping + +The first one anchors in the current (!) element so it will only consider direct +children. The second one does a deep lookup and looks at the descendants as well. +Furthermore we have a few extra shortcuts like \type {**} in \type {a/**/b} which +represents all descendants. + +The expressions (between square brackets) has to be valid \LUA\ and some +preprocessing is done to resolve the built in functions. So, you might use code +like: + +\starttyping +my_lpeg_expression:match(text()) == "whatever" +\stoptyping + +given that \type {my_lpeg_expression} is known. In the examples below we use the +visualizer to show the steps. Some are shown more than once as part of a set. + +\stopsection + +\startsection[title={special cases}] + +\xmllshow{} +\xmllshow{*} +\xmllshow{.} +\xmllshow{/} + +\stopsection + +\startsection[title={wildcards}] + +\xmllshow{*} +\xmllshow{*:*} +\xmllshow{/*} +\xmllshow{/*:*} +\xmllshow{*/*} +\xmllshow{*:*/*:*} + +\xmllshow{a/*} +\xmllshow{a/*:*} +\xmllshow{/a/*} +\xmllshow{/a/*:*} + +\xmllshow{/*} +\xmllshow{/**} +\xmllshow{/***} + +\stopsection + +\startsection[title={multiple steps}] + +\xmllshow{answer} +\xmllshow{answer/test/*} +\xmllshow{answer/test/child::} +\xmllshow{answer/*} +\xmllshow{answer/*[tag()='p' and position()=1 and text()!='']} + +\stopsection + +\startsection[title={pitfals}] + +\xmllshow{[oneof(lower(@encoding),'tex','context','ctx')]} +\xmllshow{.[oneof(lower(@encoding),'tex','context','ctx')]} + +\stopsection + +\startsection[title={more special cases}] + +\xmllshow{**} +\xmllshow{*} +\xmllshow{..} +\xmllshow{.} +\xmllshow{//} +\xmllshow{/} + +\xmllshow{**/} +\xmllshow{**/*} +\xmllshow{**/.} +\xmllshow{**//} + +\xmllshow{*/} +\xmllshow{*/*} +\xmllshow{*/.} +\xmllshow{*//} + +\xmllshow{/**/} +\xmllshow{/**/*} +\xmllshow{/**/.} +\xmllshow{/**//} + +\xmllshow{/*/} +\xmllshow{/*/*} +\xmllshow{/*/.} +\xmllshow{/*//} + +\xmllshow{./} +\xmllshow{./*} +\xmllshow{./.} +\xmllshow{.//} + +\xmllshow{../} +\xmllshow{../*} +\xmllshow{../.} +\xmllshow{..//} + +\stopsection + +\startsection[title={more wildcards}] + +\xmllshow{one//two} +\xmllshow{one/*/two} +\xmllshow{one/**/two} +\xmllshow{one/***/two} +\xmllshow{one/x//two} +\xmllshow{one//x/two} +\xmllshow{//x/two} + +\stopsection + +\startsection[title={special axis}] + +\xmllshow{descendant::whocares/ancestor::whoknows} +\xmllshow{descendant::whocares/ancestor::whoknows/parent::} +\xmllshow{descendant::whocares/ancestor::} +\xmllshow{child::something/child::whatever/child::whocares} +\xmllshow{child::something/child::whatever/child::whocares|whoknows} +\xmllshow{child::something/child::whatever/child::(whocares|whoknows)} +\xmllshow{child::something/child::whatever/child::!(whocares|whoknows)} +\xmllshow{child::something/child::whatever/child::(whocares)} +\xmllshow{child::something/child::whatever/child::(whocares)[position()>2]} +\xmllshow{child::something/child::whatever[position()>2][position()=1]} +\xmllshow{child::something/child::whatever[whocares][whocaresnot]} +\xmllshow{child::something/child::whatever[whocares][not(whocaresnot)]} +\xmllshow{child::something/child::whatever/self::whatever} + +There is also \type {last-match::} that starts with the last found set of nodes. +This can save some runtime when you do lots of tests combined with a same check +afterwards. + +\stopsection + +\startsection[title={some more examples}] + +\xmllshow{/something/whatever} +\xmllshow{something/whatever} +\xmllshow{/**/whocares} +\xmllshow{whoknows/whocares} +\xmllshow{whoknows} +\xmllshow{whocares[contains(text(),'f') or contains(text(),'g')]} +\xmllshow{whocares/first()} +\xmllshow{whocares/last()} +\xmllshow{whatever/all()} +\xmllshow{whocares/position(2)} +\xmllshow{whocares/position(-2)} +\xmllshow{whocares[1]} +\xmllshow{whocares[-1]} +\xmllshow{whocares[2]} +\xmllshow{whocares[-2]} +\xmllshow{whatever[3]/attribute(id)} +\xmllshow{whatever[2]/attribute('id')} +\xmllshow{whatever[3]/text()} +\xmllshow{/whocares/first()} +\xmllshow{/whocares/last()} + +\xmllshow{xml://whatever/all()} +\xmllshow{whatever/all()} +\xmllshow{//whocares} +\xmllshow{..[2]} +\xmllshow{../*[2]} + +\xmllshow{/(whocares|whocaresnot)} +\xmllshow{/!(whocares|whocaresnot)} +\xmllshow{/!whocares} + +\xmllshow{/interface/command/command(xml:setups:register)} +\xmllshow{/interface/command[@name='xxx']/command(xml:setups:typeset)} +\xmllshow{/arguments/*} +\xmllshow{/sequence/first()} +\xmllshow{/arguments/text()} +\xmllshow{/sequence/variable/first()} +\xmllshow{/interface/define[@name='xxx']/first()} +\xmllshow{/parameter/command(xml:setups:parameter:measure)} + +\xmllshow{/(*:library|figurelibrary)/*:figure/*:label} +\xmllshow{/(*:library|figurelibrary)/figure/*:label} +\xmllshow{/(*:library|figurelibrary)/figure/label} +\xmllshow{/(*:library|figurelibrary)/figure:*/label} + +\xmlshow {whatever//br[tag(1)='br']} + +\stopsection + +\stopchapter + +\startchapter[title=Examples] + +\startsection[title=attribute chains] + +In \CSS, when an attribute is not present, the parent element is checked, and when +not found again, the lookup follows the chain till a match is found or the root is +reached. The following example demonstrates how such a chain lookup works. + +\startbuffer[test] +<something mine="1" test="one" more="alpha"> + <whatever mine="2" test="two"> + <whocares mine="3"> + <!-- this is a test --> + </whocares> + </whatever> +</something> +\stopbuffer + +\typebuffer[test] + +We apply the following setups to this tree: + +\startbuffer[setups] +\startxmlsetups xml:common + [ + \xmlchainatt{#1}{mine}, + \xmlchainatt{#1}{test}, + \xmlchainatt{#1}{more}, + \xmlchainatt{#1}{none} + ]\par +\stopxmlsetups + +\startxmlsetups xml:something + something: \xmlsetup{#1}{xml:common} + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:whatever + whatever: \xmlsetup{#1}{xml:common} + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:whocares + whocares: \xmlsetup{#1}{xml:common} + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:mysetups + \xmlsetsetup{#1}{something|whatever|whocares}{xml:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-1}{xml:mysetups} + +\xmlprocessbuffer{example-1}{test}{} +\stopbuffer + +\typebuffer[setups] + +This gives: + +\start + \getbuffer[setups] +\stop + +\stopsection + +\startsection[title=Conditional setups] + +Say that we have this code: + +\starttyping +\xmldoifelse {#1} {/what[@a='1']} { + \xmlfilter {#1} {/what/command('xml:yes')} +} { + \xmlfilter {#1} {/what/command('xml:nop')} +} +\stoptyping + +Here we first determine if there is a child \type {what} with attribute \type {a} +set to \type {1}. Depending on the outcome again we check the child nodes for +being named \type {what}. A faster solution which also takes less code is this: + +\starttyping +\xmlfilter {#1} {/what[@a='1']/command('xml:yes','xml:nop')} +\stoptyping + +\stopsection + +\startsection[title=Manipulating] + +Assume that we have the following \XML\ data: + +\startbuffer[test] +<A> + <B>right</B> + <B>wrong</B> +</A> +\stopbuffer + +\typebuffer[test] + +But, instead of \type {right} we want to see \type {okay}. We can do that with a +finalizer: + +\startbuffer +\startluacode +local rehash = { + ["right"] = "okay", +} + +function xml.finalizers.tex.Okayed(collected,what) + for i=1,#collected do + if what == "all" then + local str = xml.text(collected[i]) + context(rehash[str] or str) + else + context(str) + end + end +end +\stopluacode +\stopbuffer + +\typebuffer \getbuffer + +\startbuffer +\startxmlsetups xml:A + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:B + (It's \xmlfilter{#1}{./Okayed("all")}) +\stopxmlsetups + +\startxmlsetups xml:testsetups + \xmlsetsetup{#1}{A|B}{xml:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-2}{xml:testsetups} +\xmlprocessbuffer{example-2}{test}{} +\stopbuffer + +\typebuffer + +The result is: \start \inlinebuffer \stop + +\stopsection + +\startsection[title=Cross referencing] + +A rather common way to add cross references to \XML\ files is to borrow the +asymmetrical id's from \HTML. This means that one cannot simply use a value +of (say) \type {href} to locate an \type {id}. The next example came up on +the \CONTEXT\ mailing list. + +\startbuffer[test] +<doc> + <p>Text + <a href="#fn1" class="footnoteref" id="fnref1"><sup>1</sup></a> and + <a href="#fn2" class="footnoteref" id="fnref2"><sup>2</sup></a> + </p> + <div class="footnotes"> + <hr /> + <ol> + <li id="fn1"><p>A footnote.<a href="#fnref1">↩</a></p></li> + <li id="fn2"><p>A second footnote.<a href="#fnref2">↩</a></p></li> + </ol> + </div> +</doc> +\stopbuffer + +\typebuffer[test] + +We give two variants for dealing with such references. The first solution does +lookups and depending on the size of the file can be somewhat inefficient. + +\startbuffer +\startxmlsetups xml:doc + \blank + \xmlflush{#1} + \blank +\stopxmlsetups + +\startxmlsetups xml:p + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:footnote + (variant 1)\footnote + {\xmlfirst + {example-3-1} + {div[@class='footnotes']/ol/li[@id='\xmlrefatt{#1}{href}']}} +\stopxmlsetups + +\startxmlsetups xml:initialize + \xmlsetsetup{#1}{p|doc}{xml:*} + \xmlsetsetup{#1}{a[@class='footnoteref']}{xml:footnote} + \xmlsetsetup{#1}{div[@class='footnotes']}{xml:nothing} +\stopxmlsetups + +\xmlresetdocumentsetups{*} +\xmlregisterdocumentsetup{example-3-1}{xml:initialize} + +\xmlprocessbuffer{example-3-1}{test}{} +\stopbuffer + +\typebuffer + +This will typeset two footnotes. + +\getbuffer + +The second variant collects the references so that the tiem spend on lookups is +less. + +\startbuffer +\startxmlsetups xml:doc + \blank + \xmlflush{#1} + \blank +\stopxmlsetups + +\startxmlsetups xml:p + \xmlflush{#1} +\stopxmlsetups + +\startluacode + userdata.notes = {} +\stopluacode + +\startxmlsetups xml:collectnotes + \ctxlua{userdata.notes['\xmlrefatt{#1}{id}'] = '#1'} +\stopxmlsetups + +\startxmlsetups xml:footnote + (variant 2)\footnote + {\xmlflush + {\cldcontext{userdata.notes['\xmlrefatt{#1}{href}']}}} +\stopxmlsetups + +\startxmlsetups xml:initialize + \xmlsetsetup{#1}{p|doc}{xml:*} + \xmlsetsetup{#1}{a[@class='footnoteref']}{xml:footnote} + \xmlfilter{#1}{div[@class='footnotes']/ol/li/command(xml:collectnotes)} + \xmlsetsetup{#1}{div[@class='footnotes']}{} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-3-2}{xml:initialize} + +\xmlprocessbuffer{example-3-2}{test}{} +\stopbuffer + +\typebuffer + +This will again typeset two footnotes: + +\getbuffer + +\stopsection + +\startsection[title=mapping values] + +One way to process options \type {frame} in the example below is to map the +values to values known by \CONTEXT. + +\startbuffer[test] +<a> + <nattable frame="on"> + <tr><td>#1</td><td>#2</td><td>#3</td><td>#4</td></tr> + <tr><td>#5</td><td>#6</td><td>#7</td><td>#8</td></tr> + </nattable> + <nattable frame="off"> + <tr><td>#1</td><td>#2</td><td>#3</td><td>#4</td></tr> + <tr><td>#5</td><td>#6</td><td>#7</td><td>#8</td></tr> + </nattable> + <nattable frame="no"> + <tr><td>#1</td><td>#2</td><td>#3</td><td>#4</td></tr> + <tr><td>#5</td><td>#6</td><td>#7</td><td>#8</td></tr> + </nattable> +</a> +\stopbuffer + +\typebuffer[test] + +\startbuffer +\startxmlsetups xml:a + \xmlflush{#1} +\stopxmlsetups + +\xmlmapvalue {nattable:frame} {on} {on} +\xmlmapvalue {nattable:frame} {yes} {on} +\xmlmapvalue {nattable:frame} {off} {off} +\xmlmapvalue {nattable:frame} {no} {off} + +\startxmlsetups xml:nattable + \startplacetable[title=#1] + \setupTABLE[frame=\xmlval{nattable:frame}{\xmlatt{#1}{frame}}{on}]% + \bTABLE + \xmlflush{#1} + \eTABLE + \stopplacetable +\stopxmlsetups + +\startxmlsetups xml:tr + \bTR + \xmlflush{#1} + \eTR +\stopxmlsetups + +\startxmlsetups xml:td + \bTD + \xmlflush{#1} + \eTD +\stopxmlsetups + +\startxmlsetups xml:testsetups + \xmlsetsetup{example-4}{a|nattable|tr|td|}{xml:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-4}{xml:testsetups} + +\xmlprocessbuffer{example-4}{test}{} +\stopbuffer + +The \type {\xmlmapvalue} mechanism is rather efficient and involves a minimum +of testing. + +\typebuffer + +We get: + +\getbuffer + +\stopsection + +\startsection[title=using \LUA] + +In this example we demonstrate how you can delegate rendering to \LUA. We +will construct a so called extreme table. The input is: + +\startbuffer[demo] +<?xml version="1.0" encoding="utf-8"?> + +<a> + <b> <c>1</c> <d>Text</d> </b> + <b> <c>2</c> <d>More text</d> </b> + <b> <c>2</c> <d>Even more text</d> </b> + <b> <c>2</c> <d>And more</d> </b> + <b> <c>3</c> <d>And even more</d> </b> + <b> <c>2</c> <d>The last text</d> </b> +</a> +\stopbuffer + +\typebuffer[demo] + +The processor code is: + +\startbuffer[process] +\startxmlsetups xml:test_setups + \xmlsetsetup{#1}{a|b|c|d}{xml:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-5}{xml:test_setups} + +\xmlprocessbuffer{example-5}{demo}{} +\stopbuffer + +\typebuffer + +We color a sequence of the same titles (numbers here) in red. The first +solution remembers the last title: + +\startbuffer +\startxmlsetups xml:a + \startembeddedxtable + \xmlflush{#1} + \stopembeddedxtable +\stopxmlsetups + +\startxmlsetups xml:b + \xmlfunction{#1}{test_ba} +\stopxmlsetups + +\startluacode +local lasttitle = nil + +function xml.functions.test_ba(t) + local title = xml.text(t, "/c") + local content = xml.text(t, "/d") + context.startxrow() + context.startxcell { + background = "color", + backgroundcolor = lasttitle == title and "colorone" or "colortwo", + foregroundstyle = "bold", + foregroundcolor = "white", + } + context(title) + lasttitle = title + context.stopxcell() + context.startxcell() + context(content) + context.stopxcell() + context.stopxrow() +end +\stopluacode +\stopbuffer + +\typebuffer + +The \type {embeddedxtable} environment is needed because the table is picked up +as argument. + +\getbuffer \getbuffer[process] + +The second implemetation remembers what titles are already processed so here we +can color the last one too. + +\startbuffer +\startxmlsetups xml:a + \ctxlua{xml.functions.reset_bb()} + \startembeddedxtable + \xmlflush{#1} + \stopembeddedxtable +\stopxmlsetups + +\startxmlsetups xml:b + \xmlfunction{#1}{test_bb} +\stopxmlsetups + +\startluacode +local titles + +function xml.functions.reset_bb(t) + titles = { } +end + +function xml.functions.test_bb(t) + local title = xml.text(t, "/c") + local content = xml.text(t, "/d") + context.startxrow() + context.startxcell { + background = "color", + backgroundcolor = titles[title] and "colorone" or "colortwo", + foregroundstyle = "bold", + foregroundcolor = "white", + } + context(title) + titles[title] = true + context.stopxcell() + context.startxcell() + context(content) + context.stopxcell() + context.stopxrow() +end +\stopluacode +\stopbuffer + +\typebuffer \getbuffer \getbuffer[process] + +A solution without any state variable is given below. + +\startbuffer +\startxmlsetups xml:a + \startembeddedxtable + \xmlflush{#1} + \stopembeddedxtable +\stopxmlsetups + +\startxmlsetups xml:b + \xmlfunction{#1}{test_bc} +\stopxmlsetups + +\startluacode +function xml.functions.test_bc(t) + local title = xml.text(t, "/c") + local content = xml.text(t, "/d") + context.startxrow() + local okay = xml.text(t,"./preceding-sibling::/[-1]") == title + context.startxcell { + background = "color", + backgroundcolor = okay and "colorone" or "colortwo", + foregroundstyle = "bold", + foregroundcolor = "white", + } + context(title) + context.stopxcell() + context.startxcell() + context(content) + context.stopxcell() + context.stopxrow() +end +\stopluacode +\stopbuffer + +\typebuffer \getbuffer \getbuffer[process] + +Here is a solution that delegates even more to \LUA. The previous variants were +actually not that safe with repect to special characters and didn't handle +nested elements either but the next one does. + +\startbuffer[demo] +<?xml version="1.0" encoding="utf-8"?> + +<a> + <b> <c>#1</c> <d>Text</d> </b> + <b> <c>#2</c> <d>More text</d> </b> + <b> <c>#2</c> <d>Even more text</d> </b> + <b> <c>#2</c> <d>And more</d> </b> + <b> <c>#3</c> <d>And even more</d> </b> + <b> <c>#2</c> <d>Something <i>nested</i> </d> </b> +</a> +\stopbuffer + +\typebuffer[demo] + +We also need to map the \type {i} element. + +\startbuffer +\startxmlsetups xml:a + \starttexcode + \xmlfunction{#1}{test_a} + \stoptexcode +\stopxmlsetups + +\startxmlsetups xml:c + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:d + \xmlflush{#1} +\stopxmlsetups + +\startxmlsetups xml:i + {\em\xmlflush{#1}} +\stopxmlsetups + +\startluacode +function xml.functions.test_a(t) + context.startxtable() + local previous = false + for b in xml.collected(lxml.getid(t),"/b") do + context.startxrow() + local current = xml.text(b,"/c") + context.startxcell { + background = "color", + backgroundcolor = (previous == current) and "colorone" or "colortwo", + foregroundstyle = "bold", + foregroundcolor = "white", + } + lxml.first(b,"/c") + context.stopxcell() + context.startxcell() + lxml.first(b,"/d") + context.stopxcell() + previous = current + context.stopxrow() + end + context.stopxtable() +end +\stopluacode + +\startxmlsetups xml:test_setups + \xmlsetsetup{#1}{a|b|c|d|i}{xml:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-5}{xml:test_setups} + +\xmlprocessbuffer{example-5}{demo}{} +\stopbuffer + +\typebuffer \getbuffer + +The question is, do we really need \LUA ? Often we don't, apart maybe from an +occasional special finalizer. A pure \TEX\ solution is given next: + +\startbuffer +\startxmlsetups xml:a + \glet\MyPreviousTitle\empty + \glet\MyCurrentTitle \empty + \startembeddedxtable + \xmlflush{#1} + \stopembeddedxtable +\stopxmlsetups + +\startxmlsetups xml:b + \startxrow + \xmlflush{#1} + \stopxrow +\stopxmlsetups + +\startxmlsetups xml:c + \xdef\MyCurrentTitle{\xmltext{#1}{.}} + \doifelse {\MyPreviousTitle} {\MyCurrentTitle} { + \startxcell + [background=color, + backgroundcolor=colorone, + foregroundstyle=bold, + foregroundcolor=white] + } { + \glet\MyPreviousTitle\MyCurrentTitle + \startxcell + [background=color, + backgroundcolor=colortwo, + foregroundstyle=bold, + foregroundcolor=white] + } + \xmlflush{#1} + \stopxcell +\stopxmlsetups + +\startxmlsetups xml:d + \startxcell + \xmlflush{#1} + \stopxcell +\stopxmlsetups + +\startxmlsetups xml:i + {\em\xmlflush{#1}} +\stopxmlsetups + +\startxmlsetups xml:test_setups + \xmlsetsetup{#1}{*}{xml:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-5}{xml:test_setups} + +\xmlprocessbuffer{example-5}{demo}{} +\stopbuffer + +\typebuffer \getbuffer + +You can even save a few lines of code: + +\starttyping +\startxmlsetups xml:c + \xdef\MyCurrentTitle{\xmltext{#1}{.}} + \startxcell + [background=color, + backgroundcolor=color\ifx\MyPreviousTitle\MyCurrentTitle one\else two\fi, + foregroundstyle=bold, + foregroundcolor=white] + \xmlflush{#1} + \stopxcell + \glet\MyPreviousTitle\MyCurrentTitle +\stopxmlsetups +\stoptyping + +Or if you prefer: + +\startxmlsetups xml:c + \xdef\MyCurrentTitle{\xmltext{#1}{.}} + \doifelse {\MyPreviousTitle} {\MyCurrentTitle} { + \xmlsetup{#1}{xml:c:one} + } { + \xmlsetup{#1}{xml:c:two} + } +\stopxmlsetups + +\startxmlsetups xml:c:one + \startxcell + [background=color, + backgroundcolor=colorone, + foregroundstyle=bold, + foregroundcolor=white] + \xmlflush{#1} + \stopxcell +\stopxmlsetups + +\startxmlsetups xml:c:two + \startxcell + [background=color, + backgroundcolor=colortwo, + foregroundstyle=bold, + foregroundcolor=white] + \xmlflush{#1} + \stopxcell + \global\let\MyPreviousTitle\MyCurrentTitle +\stopxmlsetups + +These examples demonstrate that it doesn't hurt to know a little bit of \TEX\ +programming: defining macros and basic comparisons can come in handy. There are +examples in the test suite, you can peek in the source code, you can consult +the wiki or you can just ask on the list. + +\stopsection + +\startsection[title=Last match] + +For the next example we use the following \XML\ input: + +\startbuffer[demo] +<?xml version "1.0"?> +<document> + <section id="1"> + <content> + <p>first</p> + <p>second</p> + </content> + </section> + <section id="2"> + <content> + <p>third</p> + <p>fourth</p> + </content> + </section> +</document> +\stopbuffer + +\typebuffer[demo] + +If you check if some element is present and then act accordingly, you can +end up with doing the same lookup twice. Although it might sound inefficient, +in practice it's often not measureable. + +\startbuffer +\startxmlsetups xml:demo:document + \type{\xmlall{#1}{/section[@id='2']/content/p}}\par + \xmldoif{#1}{/section[@id='2']/content/p} { + \xmlall{#1}{/section[@id='2']/content/p} + } + \type{\xmllastmatch}\par + \xmldoif{#1}{/section[@id='2']/content/p} { + \xmllastmatch + } + \type{\xmlall{#1}{last-match::}}\par + \xmldoif{#1}{/section[@id='2']/content/p} { + \xmlall{#1}{last-match::} + } + \type{\xmlfilter{#1}{last-match::/command(xml:demo:p)}}\par + \xmldoif{#1}{/section[@id='2']/content/p} { + \xmlfilter{#1}{last-match::/command(xml:demo:p)} + } +\stopxmlsetups + +\startxmlsetups xml:demo:p + \quad\xmlflush{#1}\endgraf +\stopxmlsetups + +\startxmlsetups xml:demo:base + \xmlsetsetup{#1}{document|p}{xml:demo:*} +\stopxmlsetups + +\xmlregisterdocumentsetup{example-6}{xml:demo:base} + +\xmlprocessbuffer{example-6}{demo}{} +\stopbuffer + +\typebuffer + +In the second check we just flush the last match, so effective we do an \type +{\xmlall} here. The third and fourth alternatives demonstrate how we can use +\type {last-match} as axis. The gain is 10\% or more on the lookup but of course +typesetting often takes relatively more time than the lookup. + +\startpacked +\getbuffer +\stoppacked + +\stopsection + +\stopchapter + +\stopbodymatter + +\stoptext |