From cf6361d25d30139053d6a2e54e90e00210df7dd2 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sat, 25 Feb 2012 14:17:19 +0100 Subject: =?UTF-8?q?implemented=20and=20documented=20the=20=E2=80=9Cother?= =?UTF-8?q?=5Fchars=E2=80=9D=20switch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/context/third/enigma/enigma_manual.tex | 180 +++++++++++++++++----- scripts/context/lua/third/enigma/mtx-t-enigma.lua | 8 +- tex/context/third/enigma/enigma.lua | 100 ++++++++---- tex/context/third/enigma/t-enigma.mkvi | 10 +- 4 files changed, 223 insertions(+), 75 deletions(-) diff --git a/doc/context/third/enigma/enigma_manual.tex b/doc/context/third/enigma/enigma_manual.tex index d136dc1..75e6936 100644 --- a/doc/context/third/enigma/enigma_manual.tex +++ b/doc/context/third/enigma/enigma_manual.tex @@ -4,6 +4,59 @@ \TODO{instuctions for plain, latex + ctx} \stopdocsection +\startdocsection[title=Options Explained] + \TODO{day key syntax, rotor settings strings etc.} + +%%% other_chars +Most documents don’t naturally adhere to the machine-imposed restriction +to the 26 letters of the Latin alphabet. The original encipherment +directives comprised substitution tables to compensate for a set of intrinsic +peculiarities of the German language, like umlauts and common digraphs. +The \emph{Enigma} simulation module strives to apply these automatically +but there is no guarantee of completeness. + +However, the Enigma lacks means of handling languages other than German. +When the substitution lookup fails, there are two ways of proceeding: +either to ignore the current character or to pass it on to the output as +if nothing happened. The default behaviour is to drop alien letters and +move on. If the user intends to keep these foreign characters instead, +E can achieve this by setting the \identifier{other_chars} key in the +Enigma setup to the value \emph{true}. An example of how the result of +both methods may look, other thing being equal, is given in below +listing (example for \CONTEXT). + +\startcontexttyping +\usemodule [enigma] +\defineenigma [secretmessage] +\setupenigma [secretmessage] [ + other_chars = yes, + day_key = B V III II 12 03 01 GI JV KZ WM PU QY AD CN ET FL, + rotor_setting = ben, +] + +\defineenigma [othermessage] [secretmessage] +\setupenigma [othermessage] [other_chars=wrong] + +\starttext + +\startsecretmessage + føo bąr baž +\stopsecretmessage +\startothermessage + føo bąr baž +\stopothermessage + +\stoptext +\stopcontexttyping + +Both methods have their disadvantages: if the user chooses to have the +unknown characters removed it might distort the decrypted text to +becoming illegible. Far more serious, however, are the consequences of +keeping them. As artefacts in the ciphertext they would convey +information about the structure of the plain text. + +\stopdocsection + \startdocsection[title=Basic Functionality] Encrypt the text of your document using the script interface. For @@ -11,8 +64,7 @@ a start try out the settings as given in below listing. \starttyping mtxrun --script mtx-t-enigma \ - --setup="other_chars = ok, \ - day_key = B I II III 01 01 01, \ + --setup="day_key = B I II III 01 01 01, \ rotor_setting = xyz, \ verbose=0" \ --text="Gentlemen don’t read each other’s mail, Mr. Turing\!" @@ -27,7 +79,6 @@ document. \usemodule[enigma] \defineenigma [secretmessage] \setupenigma [secretmessage] [ - other_chars = ok, day_key = B I II III 01 01 01, rotor_setting = xyz, verbose=3, @@ -47,40 +98,6 @@ reappear. Notice that punctuation is substituted with the letter “x” before encryption and that spaces are omitted. -\stopdocsection - -\startdocsection[title=Uses and Abuses] - -In \LUATEX, \identifier{callback}s may stack. This allows filtering the -input through many enigma machines successively. For instance, in the -following listing, two instances of the same machine are generated and -applied. - -\startcontexttyping -\usemodule[enigma] %% load the module -\defineenigma [secretmessage] %% generate and -\setupenigma [secretmessage] [ %% configure a machine - other_chars = ok, - day_key = B IV V II 01 01 01 AD CN ET FL GI JV KZ PU QY WX, - rotor_setting = foo, - verbose=3, -] - -%% now, copy the first machine’s settings -\defineenigma [othermessage] [secretmessage] - -%% here we go! -\starttext - -\startothermessage %% enable machine 1 -\startsecretmessage %% enable machine 2 while no 1 is active -Encryption equals decryption. -\stopothermessage -\stopsecretmessage - -\stoptext \endinput -\stopcontexttyping - \useURL[kgbuch] [http://de.wikipedia.org/wiki/Kenngruppenbuch] [] [code book] Now it’s certainly not wise to carry around the key to encrypted @@ -124,13 +141,94 @@ She subsequently mails this file to Bob and conveys the key through a secure channel. They only thing that will be left for Bob to do now, is to enter the key at the prompt when compiling the document with \LUALATEX. +\stopdocsection + +\startdocsection[title=Uses and Abuses] + +In \LUATEX, \identifier{callback}s may stack. This allows filtering the +input through many enigma machines successively. For instance, in the +following listing, two instances of the same machine are generated and +applied. + +\startcontexttyping +\usemodule[enigma] %% load the module +\defineenigma [secretmessage] %% generate and +\setupenigma [secretmessage] [ %% configure a machine + day_key = B IV V II 01 01 01 AD CN ET FL GI JV KZ PU QY WX, + rotor_setting = foo, + verbose=3, +] + +%% now, copy the first machine’s settings +\defineenigma [othermessage] [secretmessage] + +%% here we go! +\starttext + +\startothermessage %% enable machine 1 +\startsecretmessage %% enable machine 2 while no 1 is active +Encryption equals decryption. +\stopothermessage +\stopsecretmessage + +\stoptext \endinput +\stopcontexttyping \stopdocsection \stopdocchapter -\startdocchapter[title=Acknowledgements] -The Enigma module was inspired by Arno Trautmann’s -\identifier{chickenize} package. -Without \LUATEX, encryption on node-level would not have been possible. +\startdocchapter[title=Metadata] +\startdocsection[title=License] + +© 2012 \emph{Philipp Gesang}. All rights reserved. + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the following +conditions are met: + +\startitemize[n] + \item Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + \item Redistributions in binary form must reproduce the + above copyright notice, this list of conditions and the + following disclaimer in the documentation and/or other + materials provided with the distribution. +\stopitemize + +\begingroup +\setuptolerance [horizontal,strict] +\startalignment [right,nothyphenated] +\noindentation\startsmallcaps + this software is provided by the copyright + holder “as is” and any express or implied warranties, + including, but not limited to, the implied warranties of + merchantability and fitness for a particular purpose are + disclaimed. in no event shall the copyright holder or + contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but + not limited to, procurement of substitute goods or services; + loss of use, data, or profits; or business interruption) + however caused and on any theory of liability, whether in + contract, strict liability, or tort (including negligence or + otherwise) arising in any way out of the use of this software, + even if advised of the possibility of such damage. +\stopsmallcaps\endgraf +\stopalignment +\endgroup +\stopdocsection + +\startdocsection[title=Acknowledgements] +The idea to implement the \emph{Enigma} cipher for \TEX\ came up while I +was reading \emph{The Code Book} by Simon Singh. +This work contains an excellent portrayal of the history of German +military cryptography and Allied cryptanalysis before and during the +Second World War. +Also, the Enigma module drew lots of inspiration from Arno Trautmann’s +\identifier{chickenize} package, which remains the unsurpassed hands-on +introduction to callback trickery. +Finally, without \LUATEX\ encryption on node-level would not have been +possible. +\stopdocsection \stopdocchapter diff --git a/scripts/context/lua/third/enigma/mtx-t-enigma.lua b/scripts/context/lua/third/enigma/mtx-t-enigma.lua index 8c84385..fb7adbd 100644 --- a/scripts/context/lua/third/enigma/mtx-t-enigma.lua +++ b/scripts/context/lua/third/enigma/mtx-t-enigma.lua @@ -1,11 +1,11 @@ -- -------------------------------------------------------------------------------- --- FILE: mtx-transliterate.lua --- USAGE: mtxrun --script transliterate [--mode=mode] --s="string" --- DESCRIPTION: context script interface for the Transliterator module +-- FILE: mtx-t-enigma.lua +-- USAGE: mtxrun --script enigma --setup="s" --text="t" +-- DESCRIPTION: context script interface for the Enigma module -- REQUIREMENTS: latest ConTeXt MkIV -- AUTHOR: Philipp Gesang (Phg), --- CREATED: 2011-06-11T16:14:16+0200 +-- CREATED: 2012-02-25 10:45:39+0100 -------------------------------------------------------------------------------- -- diff --git a/tex/context/third/enigma/enigma.lua b/tex/context/third/enigma/enigma.lua index 371ddd9..521c4b7 100644 --- a/tex/context/third/enigma/enigma.lua +++ b/tex/context/third/enigma/enigma.lua @@ -138,8 +138,8 @@ By default the output to \type{stdout} will be zero. The verbosity level can be adjusted in order to alleviate debugging. \stopparagraph --ichd]]-- -local verbose_level = 42 ---local verbose_level = 0 +--local verbose_level = 42 +local verbose_level = 0 --[[ichd-- \startparagraph @@ -575,10 +575,10 @@ accomplished, there will be an optional (re-)uppercasing. \stopparagraph \startparagraph -Substitutions are applied onto the lowercased input. You might want -to avoid some of these, above all the rules for numbers, because they -translate single digits only. The solution is to write out numbers above -ten. +Substitutions \reference[listing:preproc]{}are applied onto the +lowercased input. You might want to avoid some of these, above all the +rules for numbers, because they translate single digits only. The +solution is to write out numbers above ten. \stopparagraph --ichd]]-- @@ -737,9 +737,6 @@ local variable, \identifier{pb_char}. machine.step = machine.step + 1 machine:rotate() local pb = machine.plugboard - --if valid_char_p[char] == nil then -- skip unwanted characters - -- return char - --end char = letter_to_value[char] local pb_char = pb[char] -- first plugboard substitution emit(2, pprint_step, machine.step, char, pb_char) @@ -1078,8 +1075,34 @@ consists of three elements: return result or handle_day_key(nil, name, dk) end - new = function (name, setup_string, pattern) - --local raw_settings = lpegmatch(p_init, setup_string) +--[[ichd-- +\startparagraph +The enigma encoding is restricted to an input -- and, naturally, output +-- alphabet of exactly twenty-seven characters. Obviously, this would +severely limit the set of encryptable documents. For this reason the +plain text would be \emph{preprocessed} prior to encoding, removing +spaces and substituting a range of characters, e.\,g. punctuation, with +placeholders (“X”) from the encodable spectrum. See above +\at{page}[listing:preproc] for a comprehensive list of substitutions. +\stopparagraph + +\startparagraph +The above mentioned preprocessing, however, does not even nearly extend +to the whole unicode range that modern day typesetting is expected to +handle. Thus, sooner or later an Enigma machine will encounter +non-preprocessable characters and it will have to decide what to do with +them. The Enigma module offers two ways to handle this kind of +situation: \emph{drop} those characters, possibly distorting the +deciphered plain text, or to leave them in, leaving hints behind as to +the structure of the encrypted text. None of these is optional, so it is +nevertheless advisable to not include non-latin characters in the plain +text in the first place. The settings key \identifier{other_chars} (type +boolean) determines whether we will keep or drop offending characters. +\stopparagraph +--ichd]]-- + + new = function (name, args) + local setup_string, pattern = args.day_key, args.rotor_setting local raw_settings = handle_day_key(setup_string, name) local rotors, ring = get_rotors(raw_settings.rotors, raw_settings.ring) @@ -1096,6 +1119,7 @@ consists of three elements: rotors = rotors, ring = ring, state = init_state, + other_chars = args.other_chars, ---> a>1, b>2, c>3 reflector = letter_to_value[raw_settings.reflector], plugboard = plugboard, @@ -1225,7 +1249,7 @@ a sanitizer routine and, if so, apply it to its value. end local sanitizers = { - other_chars = toboolean, + other_chars = toboolean, -- true = keep, false = drop day_key = alphanum_or_space, rotor_setting = ensure_alpha, verbose = ensure_int, @@ -1294,20 +1318,24 @@ local new_callback = function (machine, name) if n.id == glyph_node then local chr = utf8char(n.char) local replacement = machine:encode(chr) - if replacement == false then - --noderemove(head, n) + --if replacement == false then + if not replacement then + if not machine.other_chars then + noderemove(head, n) + end elseif type(replacement) == "string" then local insertion = nodecopy(n) insertion.char = utf8byte(replacement) nodeinsert_before(head, n, insertion) + noderemove(head, n) elseif type(replacement) == "table" then for i=1, #replacement do local insertion = nodecopy(n) insertion.char = utf8byte(replacement[i]) nodeinsert_before(head, n, insertion) end + noderemove(head, n) end - noderemove(head, n) elseif n.id == glue_node then -- spaces are dropped noderemove(head, n) @@ -1334,6 +1362,18 @@ local new_callback = function (machine, name) end end +--[[ichd-- +\startparagraph +Enigma\reference[listing:retrieve]{} machines can be copied and derived +from one another at will, cf. the \texmacro{defineenigma} on +\at{page}[listing:define]. Two helper functions residing inside the +\identifier{thirddata.enigma} namespace take care of these actions: +\luafunction{save_raw_args} and \luafunction{retrieve_raw_args}. As soon +as a machine is defined, we store its parsed options inside the table +\identifier{configurations} for later reference. For further details on +the machine derivation mechanism see \at{page}[listing:inherit]. +\stopparagraph +--ichd]]-- local configurations = { } local save_raw_args = function (conf, name) local current = configurations[name] or { } @@ -1350,26 +1390,28 @@ enigma.save_raw_args = save_raw_args enigma.retrieve_raw_args = retrieve_raw_args -local new_machine = function (args, name) - verbose_level = args.verbose - local machine = new(name, args.day_key, args.rotor_setting) +--[[ichd-- +\startparagraph +The function \luafunction{new_machine} instantiates a table containing +the complete specification of a workable \emph{Enigma} machine and other +metadata. The result is intended to be handed over to the callback +creation mechanism (\luafunction{new_callback}). However, the arguments +table is usally stored away in the \identifier{thirddata.enigma} +namespace anyway (\luafunction{save_raw_args}), so that the +specification of any machine can be inherited by some new setup later +on. +\stopparagraph +--ichd]]-- +local new_machine = function (_, name) + local args = configurations[name] + verbose_level = args.verbose or verbose_level + local machine = new(name, args) return machine end enigma.new_machine = new_machine enigma.new_callback = new_callback ------------------------------------------------------------------------- - ---local teststring = [[B I IV III 16 26 08 AD CN ET FL GI JV KZ PU QY WX]] ---local teststring = [[B I II III 01 01 01 AD CN ET FL GI JV KZ PU QY WX]] ---local teststring = [[B I II III 01 01 01]] ---local teststring = [[B I II III 01 01 02]] ---local teststring = [[B I II III 02 02 02]] ---local teststring = [[B I IV III 16 26 08 AD CN ET FL GI JV KZ PU QY WX]] ---local teststring = [[B I IV III 16 26 08]] ---local teststring = [[B I IV III 01 01 02]] - --[[ichd-- \stopdocsection --ichd]]-- diff --git a/tex/context/third/enigma/t-enigma.mkvi b/tex/context/third/enigma/t-enigma.mkvi index 0d2d9bf..8bf0797 100644 --- a/tex/context/third/enigma/t-enigma.mkvi +++ b/tex/context/third/enigma/t-enigma.mkvi @@ -41,7 +41,7 @@ thirddata = thirddata or { } %D \startdocsection[title=Macro Generator] %D \startparagraph -%D The main setup. The \texmacro{defineenigma} macro does not adhere to +%D The main setup.\reference[listing:define]{} The \texmacro{defineenigma} macro does not adhere to %D the reommended practis of automatical macro derivation. Rather, we %D have our own parser do the job of setting globals. This is a %D consequence of the intention to offer the same behavior in any of the @@ -76,10 +76,18 @@ thirddata = thirddata or { } }% } +%D \startparagraph +%D The \texmacro{inherit_enigma}\reference[listing:inherit]{} is called as +%D an intermediate step when deriving one machine from an already existing +%D one. It gets the stored configuration of its ancestor, relying on the +%D \luafunction{retrieve_raw_args} function (see +%D \at{page}[listing:retrieve]. +%D \stopparagraph \def\inherit_enigma#to#from{% \ctxlua{% local enigma = thirddata.enigma local current_args = enigma.retrieve_raw_args(\!!bs#from\!!es) + enigma.save_raw_args(current_args, \!!bs#to\!!es) enigma.new_callback(enigma.new_machine(current_args, \!!bs#to\!!es), \!!bs#to\!!es) }% -- cgit v1.2.3