From cf6361d25d30139053d6a2e54e90e00210df7dd2 Mon Sep 17 00:00:00 2001
From: Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de>
Date: Sat, 25 Feb 2012 14:17:19 +0100
Subject: =?UTF-8?q?implemented=20and=20documented=20the=20=E2=80=9Cother?=
 =?UTF-8?q?=5Fchars=E2=80=9D=20switch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tex/context/third/enigma/enigma.lua    | 100 +++++++++++++++++++++++----------
 tex/context/third/enigma/t-enigma.mkvi |  10 +++-
 2 files changed, 80 insertions(+), 30 deletions(-)

(limited to 'tex')

diff --git a/tex/context/third/enigma/enigma.lua b/tex/context/third/enigma/enigma.lua
index 371ddd9..521c4b7 100644
--- a/tex/context/third/enigma/enigma.lua
+++ b/tex/context/third/enigma/enigma.lua
@@ -138,8 +138,8 @@ By default the output to \type{stdout} will be zero. The verbosity level
 can be adjusted in order to alleviate debugging.
 \stopparagraph
 --ichd]]--
-local verbose_level = 42
---local verbose_level = 0
+--local verbose_level = 42
+local verbose_level = 0
 
 --[[ichd--
 \startparagraph
@@ -575,10 +575,10 @@ accomplished, there will be an optional (re-)uppercasing.
 \stopparagraph
 
 \startparagraph
-Substitutions are applied onto the lowercased input. You might want
-to avoid some of these, above all the rules for numbers, because they
-translate single digits only. The solution is to write out numbers above
-ten.
+Substitutions \reference[listing:preproc]{}are applied onto the
+lowercased input. You might want to avoid some of these, above all the
+rules for numbers, because they translate single digits only. The
+solution is to write out numbers above ten.
 \stopparagraph
 --ichd]]--
 
@@ -737,9 +737,6 @@ local variable, \identifier{pb_char}.
     machine.step = machine.step + 1
     machine:rotate()
     local pb = machine.plugboard
-    --if valid_char_p[char] == nil then -- skip unwanted characters
-    --  return char
-    --end
     char = letter_to_value[char]
     local pb_char = pb[char]              -- first plugboard substitution
     emit(2, pprint_step, machine.step, char, pb_char)
@@ -1078,8 +1075,34 @@ consists of three elements:
     return result or handle_day_key(nil, name, dk)
   end
 
-  new = function (name, setup_string, pattern)
-    --local raw_settings = lpegmatch(p_init, setup_string)
+--[[ichd--
+\startparagraph
+The enigma encoding is restricted to an input -- and, naturally, output
+-- alphabet of exactly twenty-seven characters. Obviously, this would
+severely limit the set of encryptable documents. For this reason the
+plain text would be \emph{preprocessed} prior to encoding, removing
+spaces and substituting a range of characters, e.\,g. punctuation, with
+placeholders (“X”) from the encodable spectrum. See above
+\at{page}[listing:preproc] for a comprehensive list of substitutions.
+\stopparagraph
+
+\startparagraph
+The above mentioned preprocessing, however, does not even nearly extend
+to the whole unicode range that modern day typesetting is expected to
+handle. Thus, sooner or later an Enigma machine will encounter
+non-preprocessable characters and it will have to decide what to do with
+them. The Enigma module offers two ways to handle this kind of
+situation: \emph{drop} those characters, possibly distorting the
+deciphered plain text, or to leave them in, leaving hints behind as to
+the structure of the encrypted text. None of these is optional, so it is
+nevertheless advisable to not include non-latin characters in the plain
+text in the first place. The settings key \identifier{other_chars} (type
+boolean) determines whether we will keep or drop offending characters.
+\stopparagraph
+--ichd]]--
+
+  new = function (name, args)
+    local setup_string, pattern = args.day_key, args.rotor_setting
     local raw_settings = handle_day_key(setup_string, name)
     local rotors, ring =
       get_rotors(raw_settings.rotors, raw_settings.ring)
@@ -1096,6 +1119,7 @@ consists of three elements:
       rotors              = rotors,
       ring                = ring,
       state               = init_state,
+      other_chars         = args.other_chars,
       ---> a>1, b>2, c>3
       reflector           = letter_to_value[raw_settings.reflector],
       plugboard           = plugboard,
@@ -1225,7 +1249,7 @@ a sanitizer routine and, if so, apply it to its value.
   end
 
   local sanitizers = {
-    other_chars   = toboolean,
+    other_chars   = toboolean,          -- true = keep, false = drop
     day_key       = alphanum_or_space,
     rotor_setting = ensure_alpha,
     verbose       = ensure_int,
@@ -1294,20 +1318,24 @@ local new_callback = function (machine, name)
       if n.id == glyph_node then
         local chr         = utf8char(n.char)
         local replacement = machine:encode(chr)
-        if replacement == false then
-          --noderemove(head, n)
+        --if replacement == false then
+        if not replacement then
+          if not machine.other_chars then
+            noderemove(head, n)
+          end
         elseif type(replacement) == "string" then
           local insertion = nodecopy(n)
           insertion.char = utf8byte(replacement)
           nodeinsert_before(head, n, insertion)
+          noderemove(head, n)
         elseif type(replacement) == "table" then
           for i=1, #replacement do
             local insertion = nodecopy(n)
             insertion.char = utf8byte(replacement[i])
             nodeinsert_before(head, n, insertion)
           end
+          noderemove(head, n)
         end
-        noderemove(head, n)
       elseif  n.id == glue_node  then
         -- spaces are dropped
         noderemove(head, n)
@@ -1334,6 +1362,18 @@ local new_callback = function (machine, name)
   end
 end
 
+--[[ichd--
+\startparagraph
+Enigma\reference[listing:retrieve]{} machines can be copied and derived
+from one another at will, cf.  the \texmacro{defineenigma} on
+\at{page}[listing:define]. Two helper functions residing inside the
+\identifier{thirddata.enigma} namespace take care of these actions:
+\luafunction{save_raw_args} and \luafunction{retrieve_raw_args}. As soon
+as a machine is defined, we store its parsed options inside the table
+\identifier{configurations} for later reference. For further details on
+the machine derivation mechanism see \at{page}[listing:inherit].
+\stopparagraph
+--ichd]]--
 local configurations = { }
 local save_raw_args = function (conf, name)
   local current = configurations[name] or { }
@@ -1350,26 +1390,28 @@ enigma.save_raw_args     = save_raw_args
 enigma.retrieve_raw_args = retrieve_raw_args
 
 
-local new_machine = function (args, name)
-  verbose_level = args.verbose
-  local machine = new(name, args.day_key, args.rotor_setting)
+--[[ichd--
+\startparagraph
+The function \luafunction{new_machine} instantiates a table containing
+the complete specification of a workable \emph{Enigma} machine and other
+metadata. The result is intended to be handed over to the callback
+creation mechanism (\luafunction{new_callback}). However, the arguments
+table is usally stored away in the \identifier{thirddata.enigma}
+namespace anyway (\luafunction{save_raw_args}), so that the
+specification of any machine can be inherited by some new setup later
+on.
+\stopparagraph
+--ichd]]--
+local new_machine = function (_, name)
+  local args = configurations[name]
+  verbose_level = args.verbose or verbose_level
+  local machine = new(name, args)
   return machine
 end
 
 enigma.new_machine  = new_machine
 enigma.new_callback = new_callback
 
-------------------------------------------------------------------------
-
---local teststring = [[B I IV III 16 26 08 AD CN ET FL GI JV KZ PU QY WX]]
---local teststring = [[B I II III 01 01 01 AD CN ET FL GI JV KZ PU QY WX]]
---local teststring = [[B I II III 01 01 01]]
---local teststring = [[B I II III 01 01 02]]
---local teststring = [[B I II III 02 02 02]]
---local teststring = [[B I IV III 16 26 08 AD CN ET FL GI JV KZ PU QY WX]]
---local teststring = [[B I IV III 16 26 08]]
---local teststring = [[B I IV III 01 01 02]]
-
 --[[ichd--
 \stopdocsection
 --ichd]]--
diff --git a/tex/context/third/enigma/t-enigma.mkvi b/tex/context/third/enigma/t-enigma.mkvi
index 0d2d9bf..8bf0797 100644
--- a/tex/context/third/enigma/t-enigma.mkvi
+++ b/tex/context/third/enigma/t-enigma.mkvi
@@ -41,7 +41,7 @@ thirddata = thirddata or { }
 
 %D \startdocsection[title=Macro Generator]
 %D \startparagraph
-%D The main setup. The \texmacro{defineenigma} macro does not adhere to
+%D The main setup.\reference[listing:define]{} The \texmacro{defineenigma} macro does not adhere to
 %D the reommended practis of automatical macro derivation. Rather, we
 %D have our own parser do the job of setting globals. This is a
 %D consequence of the intention to offer the same behavior in any of the
@@ -76,10 +76,18 @@ thirddata = thirddata or { }
   }%
 }
 
+%D \startparagraph
+%D The \texmacro{inherit_enigma}\reference[listing:inherit]{} is called as
+%D an intermediate step when deriving one machine from an already existing
+%D one. It gets the stored configuration of its ancestor, relying on the
+%D \luafunction{retrieve_raw_args} function (see
+%D \at{page}[listing:retrieve].
+%D \stopparagraph
 \def\inherit_enigma#to#from{%
   \ctxlua{%
     local enigma       = thirddata.enigma
     local current_args = enigma.retrieve_raw_args(\!!bs#from\!!es)
+    enigma.save_raw_args(current_args, \!!bs#to\!!es)
     enigma.new_callback(enigma.new_machine(current_args, \!!bs#to\!!es),
                         \!!bs#to\!!es)
   }%
-- 
cgit v1.2.3