From fe5dee151313b6abd8ffee2c5fc5593f326e663f Mon Sep 17 00:00:00 2001
From: Francois Fleuret <francois@fleuret.org>
Date: Wed, 9 Nov 2016 09:09:14 +0100
Subject: [PATCH] Many fixes, now generates a single image per frame.

---
 README.txt      |  15 +-
 canvas.cc       |   2 +
 canvas.h        |   2 +
 canvas_cairo.cc |  84 +++++-
 canvas_cairo.h  |   9 +-
 dyncnn.lua      | 731 ++++++++++++++++++++----------------------------
 flatland.cc     | 219 +++++++++------
 img.lua         | 204 ++++++++++++++
 run.sh          |  77 ++---
 universe.cc     |  31 --
 universe.h      |  12 -
 11 files changed, 779 insertions(+), 607 deletions(-)
 create mode 100755 img.lua
diff --git a/README.txt b/README.txt
index 85cf8ba..e143b3c 100644
--- a/README.txt
+++ b/README.txt
@@ -5,7 +5,7 @@ the dynamics of 2D shapes as described in
   F. Fleuret. Predicting the dynamics of 2d objects with a deep
   residual network. CoRR, abs/1610.04032, 2016.
 
-  https://arxiv.org/pdf/1610.04032v1
+  https://arxiv.org/abs/1610.04032
 
 This package is composed of a simple 2d physics simulator called
 'flatland' written in C++, to generate the data-set, and a deep
@@ -16,16 +16,17 @@ script.
 
 It will
 
-  (1) generate the data-set of 50k triplets of images,
+  (1) Generate the data-set of 40k triplets of images,
 
-  (2) train the deep network, and output validation results every 100
-      epochs. This take ~30h on a GTX 1080.
+  (2) Train the deep network, and output validation results every 100
+      epochs. This takes 15h on a GTX 1080 with cuda 8.0, cudnn 5.1,
+      and recent torch.
 
-  (3) generate two pictures of the internal activations.
+  (3) Generate two pictures of the internal activations.
 
-  (4) generate a graph with the loss curves if gnuplot is installed.
+  (4) Generate a graph with the loss curves if gnuplot is installed.
 
 --
 Francois Fleuret
-Oct 21, 2016
+Nov 6, 2016
 Martigny
diff --git a/canvas.cc b/canvas.cc
index 358bd7e..58d4019 100644
--- a/canvas.cc
+++ b/canvas.cc
@@ -23,3 +23,5 @@
  */
 
 #include "canvas.h"
+
+Canvas::~Canvas() {}
diff --git a/canvas.h b/canvas.h
index f54f6d0..55390ba 100644
--- a/canvas.h
+++ b/canvas.h
@@ -29,6 +29,8 @@
 
 class Canvas {
 public:
+  virtual ~Canvas();
+  virtual void clear() = 0;
   virtual void set_line_width(scalar_t w) = 0;
   virtual void set_drawing_color(scalar_t r, scalar_t g, scalar_t b) = 0;
   virtual void draw_polygon(int filled, int nb, scalar_t *x, scalar_t *y) = 0;
diff --git a/canvas_cairo.cc b/canvas_cairo.cc
index ec26f60..aeb7b9f 100644
--- a/canvas_cairo.cc
+++ b/canvas_cairo.cc
@@ -24,29 +24,86 @@
 
 #include "canvas_cairo.h"
 
+#include <cmath>
+
+#define MAX(x, y) ((x >= y) ? (x) : (y))
+
+CanvasCairo::CanvasCairo(scalar_t scale, int nb_rows, int nb_cols, CanvasCairo **ca) {
+  _actual_width = 0;
+  _actual_height = 0;
+
+  for(int i = 0; i < nb_rows; i++) {
+    int row_height = 0, row_width = 0;
+    for(int j = 0; j < nb_cols; j++) {
+      CanvasCairo *this_ca = ca[i * nb_cols + j];
+      row_height = MAX(row_height, this_ca->_actual_height);
+      row_width += this_ca->_actual_width;
+    }
+    _actual_width = MAX(_actual_width, row_width);
+    _actual_height += row_height;
+  }
+
+  _data = new unsigned char [_actual_width * _actual_height * _depth];
+
+  int x0, y0 = 0;
+  for(int i = 0; i < nb_rows; i++) {
+    x0 = 0;
+    int row_height = 0;
+    for(int j = 0; j < nb_cols; j++) {
+      CanvasCairo *this_ca = ca[i * nb_cols + j];
+      for(int y = 0; y < this_ca->_actual_height; y++) {
+        for(int x = 0; x < this_ca->_actual_width; x++) {
+          for(int d = 0; d < _depth; d++) {
+            _data[(x0 + x + _actual_width * (y0 + y))* _depth + d] =
+              this_ca->_data[(x + this_ca->_actual_width * y)* _depth + d];
+
+          }
+        }
+      }
+      row_height = MAX(row_height, this_ca->_actual_height);
+      x0 += this_ca->_actual_width;
+    }
+    y0 += row_height;
+  }
+
+  _image = cairo_image_surface_create_for_data(_data,
+                                               CAIRO_FORMAT_RGB24,
+                                               _actual_width,
+                                               _actual_height,
+                                               _actual_width * _depth);
+
+  _context_resource = cairo_create(_image);
+
+  cairo_scale(_context_resource, scale, scale);
+
+  clear();
+  // cairo_set_source_rgb(_context_resource, 1.0, 1.0, 1.0);
+  // cairo_set_line_width (_context_resource, 1.0);
+}
+
 CanvasCairo::CanvasCairo(scalar_t scale, int width, int height) {
-  const int actual_width = int(width * scale);
-  const int actual_height = int(height * scale);
-  const int depth = 4;
+  _actual_width = int(width * scale);
+  _actual_height = int(height * scale);
+  _scale = scale;
 
-  _data = new unsigned char [actual_width * actual_height * depth];
+  _data = new unsigned char [_actual_width * _actual_height * _depth];
 
   _image = cairo_image_surface_create_for_data(_data,
                                                CAIRO_FORMAT_RGB24,
-                                               actual_width,
-                                               actual_height,
-                                               actual_width * depth);
+                                               _actual_width,
+                                               _actual_height,
+                                               _actual_width * _depth);
 
   _context_resource = cairo_create(_image);
 
   cairo_scale(_context_resource, scale, scale);
 
-  cairo_set_source_rgb(_context_resource, 1.0, 1.0, 1.0);
   cairo_set_line_width (_context_resource, 1.0);
 
-  cairo_rectangle(_context_resource, 0, 0, width, height);
-
-  cairo_fill(_context_resource);
+  clear();
+  // cairo_set_source_rgb(_context_resource, 1.0, 1.0, 1.0);
+  // cairo_rectangle(_context_resource, 0, 0, width, height);
+  // cairo_fill(_context_resource);
 }
 
 CanvasCairo::~CanvasCairo() {
@@ -55,6 +112,11 @@ CanvasCairo::~CanvasCairo() {
   delete[] _data;
 }
 
+void CanvasCairo::clear() {
+  cairo_set_source_rgb(_context_resource, 1.0, 1.0, 1.0);
+  cairo_rectangle(_context_resource, 0, 0, _actual_width / _scale, _actual_height / _scale);
+  cairo_fill(_context_resource);
+}
 
 void CanvasCairo::set_line_width(scalar_t w) {
   cairo_set_line_width (_context_resource, w);
diff --git a/canvas_cairo.h b/canvas_cairo.h
index 3b8f06b..3814a88 100644
--- a/canvas_cairo.h
+++ b/canvas_cairo.h
@@ -31,14 +31,21 @@
 #include <cairo.h>
 
 class CanvasCairo : public Canvas {
+  const static int _depth = 4;
+  int _actual_width, _actual_height;
+  scalar_t _scale;
   unsigned char *_data;
   cairo_surface_t *_image;
   cairo_t *_context_resource;
 
 public:
+  CanvasCairo(scalar_t scale, int nb_rows, int nb_cols, CanvasCairo **ca);
   CanvasCairo(scalar_t scale, int width, int height);
-  ~CanvasCairo();
+  CanvasCairo(int nb_rows, int nb_cols, CanvasCairo **x);
 
+  virtual ~CanvasCairo();
+
+  virtual void clear();
   virtual void set_line_width(scalar_t w);
   virtual void set_drawing_color(scalar_t r, scalar_t g, scalar_t b);
   virtual void draw_polygon(int filled, int nb, scalar_t *x, scalar_t *y);
diff --git a/dyncnn.lua b/dyncnn.lua
index e104386..5362593 100755
--- a/dyncnn.lua
+++ b/dyncnn.lua
@@ -30,346 +30,195 @@ require 'optim'
 require 'image'
 require 'pl'
 
-----------------------------------------------------------------------
-
-local opt = lapp[[
-   --seed                (default 1)               random seed
-
-   --learningStateFile   (default '')
-   --dataDir             (default './data/10p-mg/')
-   --resultDir           (default '/tmp/dyncnn')
-
-   --learningRate        (default -1)
-   --momentum            (default -1)
-   --nbEpochs            (default -1)              nb of epochs for the heavy setting
-
-   --heavy                                         use the heavy configuration
-   --nbChannels          (default -1)              nb of channels in the internal layers
-   --resultFreq          (default 100)
-
-   --noLog                                         supress logging
-
-   --exampleInternals    (default -1)
-]]
+require 'img'
 
 ----------------------------------------------------------------------
 
-commandLine=''
-for i = 0, #arg do
-   commandLine = commandLine ..  ' \'' .. arg[i] .. '\''
+function printf(f, ...)
+   print(string.format(f, unpack({...})))
 end
 
-----------------------------------------------------------------------
-
 colors = sys.COLORS
 
-global = {}
-
-function logString(s, c)
-   if global.logFile then
-      global.logFile:write(s)
-      global.logFile:flush()
-   end
-   local c = c or colors.black
-   io.write(c .. s)
-   io.flush()
+function printfc(c, f, ...)
+   printf(c .. string.format(f, unpack({...})) .. colors.black)
 end
 
 function logCommand(c)
-   logString('[' .. c .. '] -> [' .. sys.execute(c) .. ']\n', colors.blue)
-end
-
-logString('commandline: ' .. commandLine .. '\n', colors.blue)
-
-logCommand('mkdir -v -p ' .. opt.resultDir)
-
-if not opt.noLog then
-   global.logName = opt.resultDir .. '/log'
-   global.logFile = io.open(global.logName, 'a')
+   print(colors.blue .. '[' .. c .. '] -> [' .. sys.execute(c) .. ']' .. colors.black)
 end
 
 ----------------------------------------------------------------------
+-- Environment and command line arguments
 
-alreadyLoggedString = {}
+local defaultNbThreads = 1
+local defaultUseGPU = false
 
-function logOnce(s)
-   local l = debug.getinfo(1).currentline
-   if not alreadyLoggedString[l] then
-      logString('@line ' .. l .. ' ' .. s, colors.red)
-      alreadyLoggedString[l] = s
-   end
+if os.getenv('TORCH_NB_THREADS') then
+   defaultNbThreads = os.getenv('TORCH_NB_THREADS')
+   print('Environment variable TORCH_NB_THREADS is set and equal to ' .. defaultNbThreads)
+else
+   print('Environment variable TORCH_NB_THREADS is not set')
 end
 
-----------------------------------------------------------------------
-
-nbThreads = os.getenv('TORCH_NB_THREADS') or 1
-
-useGPU = os.getenv('TORCH_USE_GPU') == 'yes'
-
-for _, c in pairs({ 'date',
-                    'uname -a',
-                    'git log -1 --format=%H'
-                 })
-do
-   logCommand(c)
+if os.getenv('TORCH_USE_GPU') then
+   defaultUseGPU = os.getenv('TORCH_USE_GPU') == 'yes'
+   print('Environment variable TORCH_USE_GPU is set and evaluated as ' .. tostring(defaultUseGPU))
+else
+   print('Environment variable TORCH_USE_GPU is not set.')
 end
 
-logString('useGPU is \'' .. tostring(useGPU) .. '\'.\n')
-
-logString('nbThreads is \'' .. nbThreads .. '\'.\n')
-
 ----------------------------------------------------------------------
 
-torch.setnumthreads(nbThreads)
-torch.setdefaulttensortype('torch.FloatTensor')
-torch.manualSeed(opt.seed)
+local cmd = torch.CmdLine()
 
-mynn = {}
+cmd:text('')
+cmd:text('General setup')
 
--- To deal elegantly with CPU/GPU
-local mt = {}
-function mt.__index(table, key)
-   return (cudnn and cudnn[key]) or (cunn and cunn[key]) or nn[key]
-end
-setmetatable(mynn, mt)
+cmd:option('-seed', 1, 'initial random seed')
+cmd:option('-nbThreads', defaultNbThreads, 'how many threads (environment variable TORCH_NB_THREADS)')
+cmd:option('-useGPU', defaultUseGPU, 'should we use cuda (environment variable TORCH_USE_GPU)')
 
--- These are the tensors that can be kept on the CPU
-mynn.SlowTensor = torch.Tensor
--- These are the tensors that should be moved to the GPU
-mynn.FastTensor = torch.Tensor
+cmd:text('')
+cmd:text('Log')
 
-----------------------------------------------------------------------
+cmd:option('-resultFreq', 100, 'at which epoch frequency should we save result images')
+cmd:option('-exampleInternals', -1, 'should we save inner activation images')
+cmd:option('-noLog', false, 'should we prevent logging')
+cmd:option('-rundir', '', 'the directory for results')
 
-if useGPU then
-   require 'cutorch'
-   require 'cunn'
-   require 'cudnn'
+cmd:text('')
+cmd:text('Training')
 
-   mynn.FastTensor = torch.CudaTensor
+cmd:option('-nbEpochs', 1000, 'nb of epochs for the heavy setting')
+cmd:option('-learningRate', 0.1, 'learning rate')
+cmd:option('-batchSize', 128, 'size of the mini-batches')
+cmd:option('-filterSize', 5, 'convolution filter size')
+cmd:option('-nbTrainSamples', 32768)
+cmd:option('-nbValidationSamples', 1024)
+cmd:option('-nbTestSamples', 1024)
 
-   if cudnn then
-      cudnn.benchmark = true
-      cudnn.fastest = true
-   end
-end
+cmd:text('')
+cmd:text('Problem to solve')
 
-----------------------------------------------------------------------
+cmd:option('-dataDir', './data/10p-mg', 'data directory')
 
-config = {}
-config.learningRate = 0.1
-config.momentum = 0
-config.batchSize = 128
-config.filterSize = 5
+cmd:text('')
+cmd:text('Network structure')
 
-if opt.heavy then
+cmd:option('-nbChannels', 16)
+cmd:option('-nbBlocks', 8)
 
-   logString('Using the heavy configuration.\n')
-   config.nbChannels = 16
-   config.nbBlocks = 4
-   config.nbEpochs = 250
-   config.nbEpochsInit = 100
-   config.nbTrainSamples = 32768
-   config.nbValidationSamples = 1024
-   config.nbTestSamples = 1024
+------------------------------
+-- Log and stuff
 
-else
+cmd:addTime('DYNCNN','%F %T')
 
-   logString('Using the light configuration.\n')
-   config.nbChannels = 2
-   config.nbBlocks = 2
-   config.nbEpochs = 6
-   config.nbEpochsInit = 3
-   config.nbTrainSamples = 1024
-   config.nbValidationSamples = 1024
-   config.nbTestSamples = 1024
+params = cmd:parse(arg)
 
+if params.rundir == '' then
+   params.rundir = cmd:string('exp', params, { })
 end
 
-if opt.nbEpochs > 0 then
-   config.nbEpochs = opt.nbEpochs
-end
+paths.mkdir(params.rundir)
 
-if opt.nbChannels > 0 then
-   config.nbChannels = opt.nbChannels
+if not params.noLog then
+   -- Append to the log if there is one
+   cmd:log(io.open(params.rundir .. '/log', 'a'), params)
 end
 
-if opt.learningRate > 0 then
-   config.learningRate = opt.learningRate
-end
+----------------------------------------------------------------------
+-- The experiment per se
 
-if opt.momentum >= 0 then
-   config.momentum = opt.momentum
+if params.predictGrasp then
+   params.targetDepth = 2
+else
+   params.targetDepth = 1
 end
 
 ----------------------------------------------------------------------
+-- Initializations
 
-function tensorCensus(tensorType, model)
+torch.setnumthreads(params.nbThreads)
+torch.setdefaulttensortype('torch.FloatTensor')
+torch.manualSeed(params.seed)
 
-   local nb = {}
+----------------------------------------------------------------------
+-- Dealing with the CPU/GPU
 
-   local function countThings(m)
-      for k, i in pairs(m) do
-         if torch.type(i) == tensorType then
-            nb[k] = (nb[k] or 0) + i:nElement()
-         end
-      end
-   end
+-- mynn will take entries in that order: mynn, cudnn, cunn, nn
 
-   model:apply(countThings)
+mynn = {}
 
-   return nb
+setmetatable(mynn,
+             {
+                __index = function(table, key)
+                   return (cudnn and cudnn[key]) or (cunn and cunn[key]) or nn[key]
+                end
+             }
+)
 
+-- These are the tensors that can be kept on the CPU
+mynn.SlowTensor = torch.Tensor
+
+-- These are the tensors that should be moved to the GPU
+mynn.FastTensor = torch.Tensor
+
+if params.useGPU then
+   require 'cutorch'
+   require 'cunn'
+   require 'cudnn'
+   cudnn.benchmark = true
+   cudnn.fastest = true
+   mynn.FastTensor = torch.CudaTensor
 end
 
 ----------------------------------------------------------------------
 
 function loadData(first, nb, name)
-   logString('Loading data `' .. name .. '\'.\n')
-
-   local persistentFileName = string.format('%s/persistent_%d_%d.dat',
-                                            opt.dataDir,
-                                            first,
-                                            nb)
-
-   -- This is at what framerate we work. It is greater than 1 so that
-   -- we can keep on disk sequences at a higher frame rate for videos
-   -- and explaining materials
-
-   local frameRate = 4
-
-   local data
-
-   if not path.exists(persistentFileName) then
-      logString(string.format('No persistent data structure, creating it (%d samples).\n', nb))
-      local data = {}
-      data.name = name
-      data.nbSamples = nb
-      data.width = 64
-      data.height = 64
-      data.input = mynn.SlowTensor(data.nbSamples, 2, data.height, data.width)
-      data.target = mynn.SlowTensor(data.nbSamples, 1, data.height, data.width)
-
-      for i = 1, data.nbSamples do
-         local n = i-1 + first-1
-         local prefix = string.format('%s/%03d/dyn_%06d',
-                                      opt.dataDir,
-                                      math.floor(n/1000), n)
-
-         function localLoad(filename, tensor)
-            local tmp
-            tmp = image.load(filename)
-            tmp:mul(-1.0):add(1.0)
-            tensor:copy(torch.max(tmp, 1))
-         end
+   print('Loading data `' .. name .. '\'.')
 
-         localLoad(prefix .. '_world_000.png', data.input[i][1])
-         localLoad(prefix .. '_grab.png',    data.input[i][2])
-         localLoad(string.format('%s_world_%03d.png', prefix, frameRate),
-                   data.target[i][1])
-      end
+   local data = {}
 
-      data.persistentFileName = persistentFileName
+   data.name = name
+   data.nbSamples = nb
+   data.width = 64
+   data.height = 64
 
-      torch.save(persistentFileName, data)
-   end
+   data.input = mynn.SlowTensor(data.nbSamples, 2, data.height, data.width)
+   data.target = mynn.SlowTensor(data.nbSamples, 1, data.height, data.width)
 
-   logCommand('sha256sum -b ' .. persistentFileName)
+   for i = 1, data.nbSamples do
+      local n = i-1 + first-1
+      local frame = image.load(string.format('%s/%03d/dyn_%06d.png',
+                                             params.dataDir,
+                                             math.floor(n/1000), n))
 
-   data = torch.load(persistentFileName)
+      frame:mul(-1.0):add(1.0)
+      frame = frame:max(1):select(1, 1)
 
-   return data
-end
+      data.input[i][1]:copy(frame:sub(0 * data.height + 1, 1 * data.height,
+                                      1 * data.width  + 1, 2 * data.width))
 
-----------------------------------------------------------------------
+      data.input[i][2]:copy(frame:sub(0 * data.height + 1, 1 * data.height,
+                                      0 * data.width  + 1, 1 * data.width))
 
--- This function gets as input a list of tensors of arbitrary
--- dimensions each, but whose two last dimension stands for height x
--- width. It creates an image tensor (2d, one channel) with each
--- argument tensor unfolded per row.
-
-function imageFromTensors(bt, signed)
-   local gap = 1
-   local tgap = -1
-   local width = 0
-   local height = gap
-
-   for _, t in pairs(bt) do
-      -- print(t:size())
-      local d = t:dim()
-      local h, w = t:size(d - 1), t:size(d)
-      local n = t:nElement() / (w * h)
-      width = math.max(width, gap + n * (gap + w))
-      height = height + gap + tgap + gap + h
+      data.target[i][1]:copy(frame:sub(1 * data.height + 1, 2 * data.height,
+                                       1 * data.width  + 1, 2 * data.width))
    end
 
-   local e = torch.Tensor(3, height, width):fill(1.0)
-   local y0 = 1 + gap
-
-   for _, t in pairs(bt) do
-      local d = t:dim()
-      local h, w = t:size(d - 1), t:size(d)
-      local n = t:nElement() / (w * h)
-      local z = t:norm() / math.sqrt(t:nElement())
-
-      local x0 = 1 + gap + math.floor( (width - n * (w + gap)) /2 )
-      local u = torch.Tensor(t:size()):copy(t):resize(n, h, w)
-      for m = 1, n do
-
-         for c = 1, 3 do
-            for y = 0, h+1 do
-               e[c][y0 + y - 1][x0     - 1] = 0.0
-               e[c][y0 + y - 1][x0 + w    ] = 0.0
-            end
-            for x = 0, w+1 do
-               e[c][y0     - 1][x0 + x - 1] = 0.0
-               e[c][y0 + h    ][x0 + x - 1] = 0.0
-            end
-         end
-
-         for y = 1, h do
-            for x = 1, w do
-               local v = u[m][y][x] / z
-               local r, g, b
-               if signed then
-                  if v < -1 then
-                     r, g, b = 0.0, 0.0, 1.0
-                  elseif v > 1 then
-                     r, g, b = 1.0, 0.0, 0.0
-                  elseif v >= 0 then
-                     r, g, b = 1.0, 1.0 - v, 1.0 - v
-                  else
-                     r, g, b = 1.0 + v, 1.0 + v, 1.0
-                  end
-               else
-                  if v <= 0 then
-                     r, g, b = 1.0, 1.0, 1.0
-                  elseif v > 1 then
-                     r, g, b = 0.0, 0.0, 0.0
-                  else
-                     r, g, b = 1.0 - v, 1.0 - v, 1.0 - v
-                  end
-               end
-               e[1][y0 + y - 1][x0 + x - 1] = r
-               e[2][y0 + y - 1][x0 + x - 1] = g
-               e[3][y0 + y - 1][x0 + x - 1] = b
-            end
-         end
-         x0 = x0 + w + gap
-      end
-      y0 = y0 + h + gap + tgap + gap
-   end
-
-   return e
+   return data
 end
 
+----------------------------------------------------------------------
+
 function collectAllOutputs(model, collection, which)
    if torch.type(model) == 'nn.Sequential' then
       for i = 1, #model.modules do
          collectAllOutputs(model.modules[i], collection, which)
       end
    elseif not which or which[torch.type(model)] then
-      local t = torch.type(model.output)
-      if t == 'torch.FloatTensor' or t == 'torch.CudaTensor' then
+      if torch.isTensor(model.output) then
          collection.nb = collection.nb + 1
          collection.outputs[collection.nb] = model.output
       end
@@ -388,9 +237,13 @@ function saveInternalsImage(model, data, n)
    collection.nb = 1
    collection.outputs[collection.nb] = input
 
-   local which = {}
-   which['nn.ReLU'] = true
-   collectAllOutputs(model, collection, which)
+   collectAllOutputs(model, collection,
+                     {
+                        ['nn.ReLU'] = true,
+                        ['cunn.ReLU'] = true,
+                        ['cudnn.ReLU'] = true,
+                     }
+   )
 
    if collection.outputs[collection.nb] ~= model.output then
       collection.nb = collection.nb + 1
@@ -398,25 +251,23 @@ function saveInternalsImage(model, data, n)
    end
 
    local fileName = string.format('%s/internals_%s_%06d.png',
-                                  opt.resultDir,
+                                  params.rundir,
                                   data.name, n)
 
-   logString('Saving ' .. fileName .. '\n')
+   print('Saving ' .. fileName)
    image.save(fileName, imageFromTensors(collection.outputs))
 end
 
 ----------------------------------------------------------------------
 
-function saveResultImage(model, data, prefix, nbMax, highlight)
-   local l2criterion = nn.MSECriterion()
+function saveResultImage(model, data, nbMax)
+   local criterion = nn.MSECriterion()
 
-   if useGPU then
-      logString('Moving the criterion to the GPU.\n')
-      l2criterion:cuda()
+   if params.useGPU then
+      print('Moving the criterion to the GPU.')
+      criterion:cuda()
    end
 
-   local prefix = prefix or 'result'
-   local result = torch.Tensor(data.height * 4 + 5, data.width + 2)
    local input = mynn.FastTensor(1, 2, data.height, data.width)
    local target = mynn.FastTensor(1, 1, data.height, data.width)
 
@@ -426,9 +277,9 @@ function saveResultImage(model, data, prefix, nbMax, highlight)
 
    model:evaluate()
 
-   logString(string.format('Write %d result images `%s\' for set `%s\' in %s.\n',
-                           nb, prefix, data.name,
-                           opt.resultDir))
+   printf('Write %d result images for `%s\'.', nb, data.name)
+
+   local lossFile = io.open(params.rundir .. '/result_' .. data.name .. '_losses.dat', 'w')
 
    for n = 1, nb do
 
@@ -437,86 +288,101 @@ function saveResultImage(model, data, prefix, nbMax, highlight)
       target:copy(data.target:narrow(1, n, 1))
 
       local output = model:forward(input)
+      local loss = criterion:forward(output, target)
+
+      output = mynn.SlowTensor(output:size()):copy(output)
+
+      -- We use our magical img.lua to create the result images
+
+      local comp = {
+         {
+            { pad = 1, data.input[n][1] },
+            { pad = 1, data.input[n][2] },
+            { pad = 1, data.target[n][1] },
+            { pad = 1, output[1][1] },
+         }
+      }
+
+      --[[
+      local comp = {
+         {
+            vertical = true,
+            { pad = 1, data.input[n][1] },
+            { pad = 1, data.input[n][2] }
+         },
+         torch.Tensor(4, 4):fill(1.0),
+         {
+            vertical = true,
+            { pad = 1, data.target[n][1] },
+            { pad = 1, output[1][1] },
+            { pad = 1, torch.csub(data.target[n][1], output[1][1]):abs() }
+         }
+      }
+      ]]--
+
+local result = combineImages(1.0, comp)
+
+result:mul(-1.0):add(1.0)
+
+local fileName = string.format('result_%s_%06d.png', data.name, n)
+image.save(params.rundir .. '/' .. fileName, result)
+lossFile:write(string.format('%f %s\n', loss, fileName))
+end
+end
 
-      local loss = l2criterion:forward(output, target)
-
-      result:fill(1.0)
-
-      if highlight then
-         for i = 1, data.height do
-            for j = 1, data.width do
-               local v = data.input[n][1][i][j]
-               result[1 + i + 0 * (data.height + 1)][1 + j] = data.input[n][2][i][j]
-               result[1 + i + 1 * (data.height + 1)][1 + j] = v
-               local a = data.target[n][1][i][j]
-               local b = output[1][1][i][j]
-               result[1 + i + 2 * (data.height + 1)][1 + j] =
-                  a * math.min(1, 0.1 + 2.0 * math.abs(a - v))
-               result[1 + i + 3 * (data.height + 1)][1 + j] =
-                  b * math.min(1, 0.1 + 2.0 * math.abs(b - v))
-            end
-         end
-      else
-         for i = 1, data.height do
-            for j = 1, data.width do
-               result[1 + i + 0 * (data.height + 1)][1 + j] = data.input[n][2][i][j]
-               result[1 + i + 1 * (data.height + 1)][1 + j] = data.input[n][1][i][j]
-               result[1 + i + 2 * (data.height + 1)][1 + j] = data.target[n][1][i][j]
-               result[1 + i + 3 * (data.height + 1)][1 + j] = output[1][1][i][j]
-            end
-         end
-      end
+----------------------------------------------------------------------
 
-      result:mul(-1.0):add(1.0)
+function createTower(filterSize, nbChannels, nbBlocks)
 
-      local fileName = string.format('%s/%s_%s_%06d.png',
-                                     opt.resultDir,
-                                     prefix,
-                                     data.name, n)
+   local tower
 
-      logString(string.format('LOSS_ON_SAMPLE %f %s\n', loss, fileName))
+   if nbBlocks == 0 then
 
-      image.save(fileName, result)
-   end
-end
+      tower = nn.Identity()
 
-----------------------------------------------------------------------
+   else
 
-function createTower(filterSize, nbChannels, nbBlocks)
-   local tower = mynn.Sequential()
+      tower = mynn.Sequential()
 
-   for b = 1, nbBlocks do
-      local block = mynn.Sequential()
+      for b = 1, nbBlocks do
+         local block = mynn.Sequential()
 
-      block:add(mynn.SpatialConvolution(nbChannels,
-                                        nbChannels,
-                                        filterSize, filterSize,
-                                        1, 1,
-                                        (filterSize - 1) / 2, (filterSize - 1) / 2))
-      block:add(mynn.SpatialBatchNormalization(nbChannels))
-      block:add(mynn.ReLU(true))
+         block:add(mynn.SpatialConvolution(nbChannels,
+                                           nbChannels,
+                                           filterSize, filterSize,
+                                           1, 1,
+                                           (filterSize - 1) / 2, (filterSize - 1) / 2))
+         block:add(mynn.SpatialBatchNormalization(nbChannels))
+         block:add(mynn.ReLU(true))
 
-      block:add(mynn.SpatialConvolution(nbChannels,
-                                        nbChannels,
-                                        filterSize, filterSize,
-                                        1, 1,
-                                        (filterSize - 1) / 2, (filterSize - 1) / 2))
+         block:add(mynn.SpatialConvolution(nbChannels,
+                                           nbChannels,
+                                           filterSize, filterSize,
+                                           1, 1,
+                                           (filterSize - 1) / 2, (filterSize - 1) / 2))
 
-      local parallel = mynn.ConcatTable()
-      parallel:add(block):add(mynn.Identity())
+         local parallel = mynn.ConcatTable()
+         parallel:add(block):add(mynn.Identity())
 
-      tower:add(parallel):add(mynn.CAddTable(true))
+         tower:add(parallel):add(mynn.CAddTable(true))
+
+         tower:add(mynn.SpatialBatchNormalization(nbChannels))
+         tower:add(mynn.ReLU(true))
+      end
 
-      tower:add(mynn.SpatialBatchNormalization(nbChannels))
-      tower:add(mynn.ReLU(true))
    end
 
    return tower
+
 end
 
-function createModel(filterSize, nbChannels, nbBlocks)
+function createModel(imageWidth, imageHeight,
+                     filterSize, nbChannels, nbBlocks)
+
    local model = mynn.Sequential()
 
+   -- Encode the two input channels (grasping image and starting
+   -- configuration) into the internal number of channels
    model:add(mynn.SpatialConvolution(2,
                                      nbChannels,
                                      filterSize, filterSize,
@@ -526,13 +392,10 @@ function createModel(filterSize, nbChannels, nbBlocks)
    model:add(mynn.SpatialBatchNormalization(nbChannels))
    model:add(mynn.ReLU(true))
 
-   local towerCode   = createTower(filterSize, nbChannels, nbBlocks)
-   local towerDecode = createTower(filterSize, nbChannels, nbBlocks)
+   -- Add the resnet modules
+   model:add(createTower(filterSize, nbChannels, nbBlocks))
 
-   model:add(towerCode)
-   model:add(towerDecode)
-
-   -- Decode to a single channel, which is the final image
+   -- Decode down to a single channel, which is the final image
    model:add(mynn.SpatialConvolution(nbChannels,
                                      1,
                                      filterSize, filterSize,
@@ -544,8 +407,22 @@ end
 
 ----------------------------------------------------------------------
 
-function fillBatch(data, first, nb, batch, permutation)
-   for k = 1, nb do
+function fillBatch(data, first, batch, permutation)
+   local actualBatchSize = math.min(params.batchSize, data.input:size(1) - first + 1)
+
+   if actualBatchSize ~= batch.input:size(1) then
+      local size = batch.input:size()
+      size[1] = actualBatchSize
+      batch.input:resize(size)
+   end
+
+   if actualBatchSize ~= batch.target:size(1) then
+      local size = batch.target:size()
+      size[1] = actualBatchSize
+      batch.target:resize(size)
+   end
+
+   for k = 1, batch.input:size(1) do
       local i
       if permutation then
          i = permutation[first + k - 1]
@@ -557,17 +434,10 @@ function fillBatch(data, first, nb, batch, permutation)
    end
 end
 
-function trainModel(model,
-                    trainData, validationData, nbEpochs, learningRate,
-                    learningStateFile)
+function trainModel(model, trainData, validationData)
 
-   local l2criterion = nn.MSECriterion()
-   local batchSize = config.batchSize
-
-   if useGPU then
-      logString('Moving the criterion to the GPU.\n')
-      l2criterion:cuda()
-   end
+   local criterion = nn.MSECriterion()
+   local batchSize = params.batchSize
 
    local batch = {}
    batch.input = mynn.FastTensor(batchSize, 2, trainData.height, trainData.width)
@@ -583,21 +453,29 @@ function trainModel(model,
       torch.setRNGState(model.RNGState)
    end
 
-   logString('Starting training.\n')
+   if params.useGPU then
+      print('Moving the model and criterion to the GPU.')
+      model:cuda()
+      criterion:cuda()
+   end
+
+   print('Starting training.')
 
    local parameters, gradParameters = model:getParameters()
-   logString(string.format('model has %d parameters.\n', parameters:storage():size(1)))
+   printf('The model has %d parameters.', parameters:storage():size(1))
 
    local averageTrainLoss, averageValidationLoss
    local trainTime, validationTime
 
+   ----------------------------------------------------------------------
+
    local sgdState = {
-      learningRate = config.learningRate,
-      momentum = config.momentum,
+      learningRate = params.learningRate,
+      momentum = 0,
       learningRateDecay = 0
    }
 
-   for e = startingEpoch, nbEpochs do
+   for e = startingEpoch, params.nbEpochs do
 
       model:training()
 
@@ -609,18 +487,19 @@ function trainModel(model,
 
       for b = 1, trainData.nbSamples, batchSize do
 
-         fillBatch(trainData, b, batchSize, batch, permutation)
+         fillBatch(trainData, b, batch, permutation)
 
          local opfunc = function(x)
-            -- Surprisingly copy() needs this check
+            -- Surprisingly, copy() needs this check
             if x ~= parameters then
                parameters:copy(x)
             end
 
             local output = model:forward(batch.input)
-            local loss = l2criterion:forward(output, batch.target)
 
-            local dLossdOutput = l2criterion:backward(output, batch.target)
+            local loss = criterion:forward(output, batch.target)
+            local dLossdOutput = criterion:backward(output, batch.target)
+
             gradParameters:zero()
             model:backward(batch.input, dLossdOutput)
 
@@ -639,6 +518,7 @@ function trainModel(model,
 
       ----------------------------------------------------------------------
       -- Validation losses
+
       do
          model:evaluate()
 
@@ -647,9 +527,9 @@ function trainModel(model,
          local startTime = sys.clock()
 
          for b = 1, validationData.nbSamples, batchSize do
-            fillBatch(validationData, b, batchSize, batch)
+            fillBatch(validationData, b, batch)
             local output = model:forward(batch.input)
-            accLoss = accLoss + l2criterion:forward(output, batch.target)
+            accLoss = accLoss + criterion:forward(output, batch.target)
             nbBatches = nbBatches + 1
          end
 
@@ -657,31 +537,27 @@ function trainModel(model,
          averageValidationLoss = accLoss / nbBatches;
       end
 
-      logString(string.format('Epoch train %0.2fs (%0.2fms / sample), validation %0.2fs (%0.2fms / sample).\n',
-                              trainTime,
-                              1000 * trainTime / trainData.nbSamples,
-                              validationTime,
-                              1000 * validationTime / validationData.nbSamples))
+      printf('Epoch train %0.2fs (%0.2fms / sample), validation %0.2fs (%0.2fms / sample).',
+             trainTime,
+             1000 * trainTime / trainData.nbSamples,
+             validationTime,
+             1000 * validationTime / validationData.nbSamples)
 
-      logString(string.format('LOSS %d %f %f\n', e, averageTrainLoss, averageValidationLoss),
-                colors.green)
+      printfc(colors.green, 'LOSS %d %f %f', e, averageTrainLoss, averageValidationLoss)
 
       ----------------------------------------------------------------------
       -- Save a persistent state so that we can restart from there
 
-      if learningStateFile then
-         model.RNGState = torch.getRNGState()
-         model.epoch = e
-         model:clearState()
-         logString('Writing ' .. learningStateFile .. '.\n')
-         torch.save(learningStateFile, model)
-      end
+      model:clearState()
+      model.RNGState = torch.getRNGState()
+      model.epoch = e
+      torch.save(params.rundir .. '/model_last.t7', model)
 
       ----------------------------------------------------------------------
       -- Save a duplicate of the persistent state from time to time
 
-      if opt.resultFreq > 0 and e%opt.resultFreq == 0 then
-         torch.save(string.format('%s/epoch_%05d_model', opt.resultDir, e), model)
+      if params.resultFreq > 0 and e%params.resultFreq == 0 then
+         torch.save(string.format('%s/model_%04d.t7', params.rundir, e), model)
          saveResultImage(model, trainData)
          saveResultImage(model, validationData)
       end
@@ -692,64 +568,65 @@ end
 
 function createAndTrainModel(trainData, validationData)
 
-   local model
+   -- Load the current training state, or create a new model from
+   -- scratch
 
-   local learningStateFile = opt.learningStateFile
-
-   if learningStateFile == '' then
-      learningStateFile = opt.resultDir .. '/learning.state'
-   end
+   if pcall(function () model = torch.load(params.rundir .. '/model_last.t7') end) then
 
-   local gotlearningStateFile
+      printfc(colors.red,
+              'Found a learning state with %d epochs finished, starting from there.',
+              model.epoch)
 
-   logString('Using the learning state file ' .. learningStateFile .. '\n')
-
-   if pcall(function () model = torch.load(learningStateFile) end) then
-
-      gotlearningStateFile = true
-
-   else
-
-      model = createModel(config.filterSize, config.nbChannels, config.nbBlocks)
-
-      if useGPU then
-         logString('Moving the model to the GPU.\n')
-         model:cuda()
+      if params.exampleInternals > 0 then
+         saveInternalsImage(model, validationData, params.exampleInternals)
+         os.exit(0)
       end
 
-   end
+   else
 
-   logString(tostring(model) .. '\n')
+      model = createModel(trainData.width, trainData.height,
+                          params.filterSize, params.nbChannels,
+                          params.nbBlocks)
 
-   if gotlearningStateFile then
-      logString(string.format('Found a learning state with %d epochs finished.\n', model.epoch),
-                colors.red)
    end
 
-   if opt.exampleInternals > 0 then
-      saveInternalsImage(model, validationData, opt.exampleInternals)
-      os.exit(0)
-   end
-
-   trainModel(model,
-              trainData, validationData,
-              config.nbEpochs, config.learningRate,
-              learningStateFile)
+   trainModel(model, trainData, validationData)
 
    return model
 
 end
 
-for i, j in pairs(config) do
-   logString('config ' .. i .. ' = \'' .. j ..'\'\n')
+----------------------------------------------------------------------
+-- main
+
+for _, c in pairs({
+      'date',
+      'uname -a',
+      'git log -1 --format=%H'
+                 })
+do
+   logCommand(c)
 end
 
-local trainData = loadData(1, config.nbTrainSamples, 'train')
-local validationData = loadData(config.nbTrainSamples + 1, config.nbValidationSamples, 'validation')
-local testData = loadData(config.nbTrainSamples + config.nbValidationSamples + 1, config.nbTestSamples, 'test')
+local trainData = loadData(1,
+                           params.nbTrainSamples, 'train')
+
+local validationData = loadData(params.nbTrainSamples + 1,
+                                params.nbValidationSamples, 'validation')
 
 local model = createAndTrainModel(trainData, validationData)
 
+----------------------------------------------------------------------
+-- Test
+
+local testData = loadData(params.nbTrainSamples + params.nbValidationSamples + 1,
+                          params.nbTestSamples, 'test')
+
+if params.useGPU then
+   print('Moving the model and criterion to the GPU.')
+   model:cuda()
+end
+
 saveResultImage(model, trainData)
 saveResultImage(model, validationData)
-saveResultImage(model, testData, nil, testData.nbSamples)
+saveResultImage(model, testData, 1024)
diff --git a/flatland.cc b/flatland.cc
index c27bd97..3a59e88 100644
--- a/flatland.cc
+++ b/flatland.cc
@@ -38,13 +38,6 @@ using namespace std;
 #include "universe.h"
 #include "canvas_cairo.h"
 
-void generate_png(Universe *universe, scalar_t scale, FILE *file) {
-  CanvasCairo canvas(scale, universe->width(), universe->height());
-  canvas.set_line_width(1.0 / scale);
-  universe->draw(&canvas);
-  canvas.write_png(file);
-}
-
 FILE *safe_fopen(const char *name, const char *mode) {
   FILE *file = fopen(name, mode);
   if(!file) {
@@ -59,34 +52,59 @@ void print_help(const char *command) {
   exit(1);
 }
 
+//////////////////////////////////////////////////////////////////////
+
+void draw_universe_on_canvas(CanvasCairo *canvas, scalar_t scaling,
+                             Universe *universe) {
+  canvas->set_line_width(1.0 / scaling);
+  universe->draw(canvas);
+}
+
+void draw_grabbing_point_on_canvas(CanvasCairo *canvas, scalar_t scaling,
+                                   scalar_t xg, scalar_t yg,
+                                   scalar_t r, scalar_t g, scalar_t b) {
+  scalar_t radius = 1/scaling;
+  int n = 36;
+  scalar_t xp[n], yp[n];
+  for(int k = 0; k < n; k++) {
+    scalar_t alpha = 2 * M_PI * scalar_t(k) / scalar_t(n);
+    xp[k] = xg + radius * cos(alpha);
+    yp[k] = yg + radius * sin(alpha);
+  }
+  canvas->set_drawing_color(r, g, b);
+  canvas->set_line_width(2.0);
+  canvas->draw_polygon(1, n, xp, yp);
+}
+
+//////////////////////////////////////////////////////////////////////
+
 int main(int argc, char **argv) {
   const scalar_t world_width = 400;
   const scalar_t world_height = 400;
-  const scalar_t block_size = 80;
+  const scalar_t scaling = 0.16; // So that 400 * 0.16 = 64
+  const scalar_t shape_size = 80;
 
   const scalar_t dt = 0.1;
-  const int nb_iterations_per_steps = 20;
+  const int nb_iterations_per_steps = 5;
 
   //////////////////////////////////////////////////////////////////////
 
-  // We will generate images { 0, every_nth, 2 * every_nth, ..., nb_frames - 1 }
+  // We will generate images { 0, every_nth, 2 * every_nth, ..., k * every_nth < nb_frames }
 
   // The framerate every_nth may be set to smaller value to generate
   // nice materials for presentations or papers.
 
   int every_nth = 4;
-
   int nb_frames = 5;
-
   int multi_grasp = 0;
   int nb_shapes = 1;
   char data_dir[1024] = "/tmp/";
+  int multi_images = 0;
+  int show_grabbing_point = 0;
+  int skip = -1;
 
   //////////////////////////////////////////////////////////////////////
 
-  Universe *universe;
-  Polygon *grabbed_polygon;
-
   if(argc < 2) {
     print_help(argv[0]);
   }
@@ -135,6 +153,23 @@ int main(int argc, char **argv) {
       i++;
     }
 
+    else if(strcmp(argv[i], "--multi_images") == 0) {
+      multi_images = 1;
+      i++;
+    }
+
+    else if(strcmp(argv[i], "--show_grabbing_point") == 0) {
+      show_grabbing_point = 1;
+      i++;
+    }
+
+    else if(strcmp(argv[i], "--skip") == 0) {
+      i++;
+      if(i == argc) { print_help(argv[0]);}
+      skip = atoi(argv[i]);
+      i++;
+    }
+
     else {
       cerr << "Unknown option " << argv[i] << "." << endl;
       abort();
@@ -151,16 +186,29 @@ int main(int argc, char **argv) {
     abort();
   }
 
-  universe = new Universe(nb_shapes, world_width, world_height);
-
   for(int n = 0; n < nb_sequences; n++) {
 
-    scalar_t grab_start_x = world_width * 0.5;
-    scalar_t grab_start_y = world_height * 0.75;
+    Universe *universe;
+    Polygon *grabbed_polygon;
+
+    universe = new Universe(nb_shapes, world_width, world_height);
+
+    const int nb_saved_frames = (nb_frames + every_nth - 1) / every_nth;
+
+    CanvasCairo *canvases[nb_saved_frames * 2];
+
+    for(int s = 0; s < 2 * nb_saved_frames; s++) {
+      canvases[s] = new CanvasCairo(scaling, universe->width(), universe->height());
+    }
+
+    scalar_t grab_start_x, grab_start_y;
 
     if(multi_grasp) {
       grab_start_x = world_width * (0.1 + 0.8 * drand48());
       grab_start_y = world_height * (0.1 + 0.8 * drand48());
+    } else {
+      grab_start_x = world_width * 0.5;
+      grab_start_y = world_height * 0.75;
     }
 
     if((n+1)%100 == 0) {
@@ -182,40 +230,30 @@ int main(int argc, char **argv) {
         nb_attempts = 0;
 
         do {
-          scalar_t x[] = {
-            - block_size * 0.4,
-            + block_size * 0.4,
-            + block_size * 0.4,
-            - block_size * 0.4,
-          };
-
-          scalar_t y[] = {
-            - block_size * 0.6,
-            - block_size * 0.6,
-            + block_size * 0.6,
-            + block_size * 0.6,
-          };
-
-          scalar_t delta = block_size / sqrt(2.0);
+          scalar_t x[] = { - shape_size * 0.4, + shape_size * 0.4,
+                           + shape_size * 0.4, - shape_size * 0.4 };
+
+          scalar_t y[] = { - shape_size * 0.6, - shape_size * 0.6,
+                           + shape_size * 0.6, + shape_size * 0.6 };
+
+          scalar_t delta = shape_size / sqrt(2.0);
+
           scalar_t object_center_x = delta + (world_width - 2 * delta) * drand48();
           scalar_t object_center_y = delta + (world_height - 2 * delta) * drand48();
-          scalar_t red, green, blue;
-          red = 1.00;
-          green = red;
-          blue = red;
+
           delete pol;
-          pol = new Polygon(0.5,
-                            red, green, blue,
-                            x, y, sizeof(x)/sizeof(scalar_t));
+          pol = new Polygon(0.5, 1.0, 1.0, 1.0, x, y, sizeof(x)/sizeof(scalar_t));
           pol->set_position(object_center_x, object_center_y, M_PI * 2 * drand48());
           pol->set_speed(0, 0, 0);
+
           universe->initialize_polygon(pol);
+
           nb_attempts++;
         } while(nb_attempts < nb_attempts_max && universe->collide(pol));
 
         if(nb_attempts == nb_attempts_max) {
           delete pol;
-          u = 0;
+          u = -1;
           universe->clear();
           nb_attempts = 0;
         } else {
@@ -226,58 +264,73 @@ int main(int argc, char **argv) {
       grabbed_polygon = universe->pick_polygon(grab_start_x, grab_start_y);
     } while(!grabbed_polygon);
 
-    const scalar_t scaling = 0.16;
-
-    CanvasCairo grab_trace(scaling, world_width, world_height);
-
-    {
+    if(n%1000 == 0) {
       char buffer[1024];
-      sprintf(buffer, "%s/%03d/", data_dir, n/1000);
+      sprintf(buffer, "%s/%03d/", data_dir, n / 1000);
       mkdir(buffer, 0777);
     }
 
-    scalar_t grab_relative_x = grabbed_polygon->relative_x(grab_start_x, grab_start_y);
-    scalar_t grab_relative_y = grabbed_polygon->relative_y(grab_start_x, grab_start_y);
-
-    {
-      int n = 36;
-      scalar_t xp[n], yp[n];
-      for(int k = 0; k < n; k++) {
-        scalar_t radius = 1/scaling;
-        scalar_t alpha = 2 * M_PI * scalar_t(k) / scalar_t(n);
-        xp[k] = grab_start_x + radius * cos(alpha);
-        yp[k] = grab_start_y + radius * sin(alpha);
+    if(skip < 0 || n >= skip) {
+
+      scalar_t grab_relative_x = grabbed_polygon->relative_x(grab_start_x, grab_start_y);
+      scalar_t grab_relative_y = grabbed_polygon->relative_y(grab_start_x, grab_start_y);
+
+      for(int s = 0; s < nb_frames; s++) {
+        if(s % every_nth == 0) {
+          int t = s / every_nth;
+          scalar_t xf = grabbed_polygon->absolute_x(grab_relative_x, grab_relative_y);
+          scalar_t yf = grabbed_polygon->absolute_y(grab_relative_x, grab_relative_y);
+
+          canvases[2 * t + 0]->clear();
+          draw_grabbing_point_on_canvas(canvases[2 * t + 0], scaling,
+                                        xf, yf, 0.0, 0.0, 0.0);
+          canvases[2 * t + 1]->clear();
+          draw_universe_on_canvas(canvases[2 * t + 1], scaling, universe);
+
+          if(show_grabbing_point) {
+            draw_grabbing_point_on_canvas(canvases[2 * t + 1], scaling,
+                                          xf, yf, 1.0, 0.0, 0.0);
+          }
+        }
+
+        if(s < nb_frames - 1) {
+          // Run the simulation
+          for(int i = 0; i < nb_iterations_per_steps; i++) {
+            scalar_t xf = grabbed_polygon->absolute_x(grab_relative_x, grab_relative_y);
+            scalar_t yf = grabbed_polygon->absolute_y(grab_relative_x, grab_relative_y);
+            grabbed_polygon->apply_force(dt, xf, yf, 0.0, -1.0);
+            universe->update(dt);
+          }
+        }
       }
-      grab_trace.set_drawing_color(0.0, 0.0, 0.0);
-      grab_trace.set_line_width(2.0);
-      grab_trace.draw_polygon(1, n, xp, yp);
-    }
 
-    for(int s = 0; s < nb_frames; s++) {
-      if(s % every_nth == 0) {
-        char buffer[1024];
-        sprintf(buffer, "%s/%03d/dyn_%06d_world_%03d.png", data_dir, n/1000, n, s);
+      char buffer[1024];
+
+      if(multi_images) {
+        for(int j = 0; j < nb_saved_frames; j++) {
+          FILE *file;
+          sprintf(buffer, "%s/%03d/dyn_%06d_grab_%02d.png", data_dir, n / 1000, n, j);
+          file = safe_fopen(buffer, "w");
+          canvases[j * 2 + 0]->write_png(file);
+          fclose(file);
+          sprintf(buffer, "%s/%03d/dyn_%06d_state_%02d.png", data_dir, n / 1000, n, j);
+          file = safe_fopen(buffer, "w");
+          canvases[j * 2 + 1]->write_png(file);
+          fclose(file);
+        }
+      } else {
+        CanvasCairo main_canvas(scaling, nb_saved_frames, 2, canvases);
+        sprintf(buffer, "%s/%03d/dyn_%06d.png", data_dir, n / 1000, n);
         FILE *file = safe_fopen(buffer, "w");
-        generate_png(universe, scaling, file);
+        main_canvas.write_png(file);
         fclose(file);
       }
-
-      for(int i = 0; i < nb_iterations_per_steps; i++) {
-        scalar_t xf = grabbed_polygon->absolute_x(grab_relative_x, grab_relative_y);
-        scalar_t yf = grabbed_polygon->absolute_y(grab_relative_x, grab_relative_y);
-        grabbed_polygon->apply_force(dt, xf, yf, 0.0, -1.0);
-        universe->update(dt);
-      }
     }
 
-    {
-      char buffer[1024];
-      sprintf(buffer, "%s/%03d/dyn_%06d_grab.png", data_dir, n/1000, n);
-      FILE *file = safe_fopen(buffer, "w");
-      grab_trace.write_png(file);
-      fclose(file);
+    for(int t = 0; t < 2 * nb_saved_frames; t++) {
+      delete canvases[t];
     }
-  }
 
-  delete universe;
+    delete universe;
+  }
 }
diff --git a/img.lua b/img.lua
new file mode 100755
index 0000000..afed4e0
--- /dev/null
+++ b/img.lua
@@ -0,0 +1,204 @@
+
+--[[
+
+   dyncnn is a deep-learning algorithm for the prediction of
+   interacting object dynamics
+
+   Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/
+   Written by Francois Fleuret <francois.fleuret@idiap.ch>
+
+   This file is part of dyncnn.
+
+   dyncnn is free software: you can redistribute it and/or modify it
+   under the terms of the GNU General Public License version 3 as
+   published by the Free Software Foundation.
+
+   dyncnn is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with dyncnn.  If not, see <http://www.gnu.org/licenses/>.
+
+]]--
+
+require 'torch'
+
+--[[
+
+The combineImage function takes as input a parameter c which is the
+value to use for the background of the resulting image (padding and
+such), and t which is either a 2d tensor, a 3d tensor, or a table.
+
+ * If t is a 3d tensor, it is returned unchanged.
+
+ * If t is a 2d tensor [r x c], it is reshaped to [1 x r x c] and
+   returned.
+
+ * If t is a table, combineImage first calls itself recursively on
+   t[1], t[2], etc.
+
+   It then creates a new tensor by concatenating the results
+   horizontally if t.vertical is nil, vertically otherwise.
+
+   It adds a padding of t.pad pixels if this field is set.
+
+ * Example
+
+   x = torch.Tensor(64, 64):fill(0.5)
+   y = torch.Tensor(100, 30):fill(0.85)
+
+   i = combineImages(1.0,
+      {
+         pad = 1,
+         vertical = true,
+         { pad = 1, x },
+         {
+            y,
+            { pad = 4, torch.Tensor(32, 16):fill(0.25) },
+            { pad = 1, torch.Tensor(45, 54):uniform(0.25, 0.9) },
+         }
+      }
+   )
+
+   image.save('example.png', i)
+
+]]--
+
+function combineImages(c, t)
+
+   if torch.isTensor(t) then
+
+      if t:dim() == 3 then
+         return t
+      elseif t:dim() == 2 then
+         return torch.Tensor(1, t:size(1), t:size(2)):copy(t)
+      else
+         error('can only deal with [height x width] or [channel x height x width] tensors.')
+      end
+
+   else
+
+      local subImages = {} -- The subimages
+      local nc = 0 -- Nb of columns
+      local nr = 0 -- Nb of rows
+
+      for i, x in ipairs(t) do
+         subImages[i] = combineImages(c, x)
+         if t.vertical then
+            nr = nr + subImages[i]:size(2)
+            nc = math.max(nc, subImages[i]:size(3))
+         else
+            nr = math.max(nr, subImages[i]:size(2))
+            nc = nc + subImages[i]:size(3)
+         end
+      end
+
+      local pad = t.pad or 0
+      local result = torch.Tensor(subImages[1]:size(1), nr + 2 * pad, nc + 2 * pad):fill(c)
+      local co = 1 + pad -- Origin column
+      local ro = 1 + pad -- Origin row
+
+      for i in ipairs(t) do
+
+         result:sub(1, subImages[1]:size(1),
+                    ro, ro + subImages[i]:size(2) - 1,
+                    co, co + subImages[i]:size(3) - 1):copy(subImages[i])
+
+         if t.vertical then
+            ro = ro + subImages[i]:size(2)
+         else
+            co = co + subImages[i]:size(3)
+         end
+
+      end
+
+      return result
+
+   end
+
+end
+
+--[[
+
+The imageFromTensors function gets as input a list of tensors of
+arbitrary dimensions each, but whose two last dimensions stand for
+height x width. It creates an image tensor (2d, one channel) with each
+argument tensor unfolded per row.
+
+]]--
+
+function imageFromTensors(bt, signed)
+   local gap = 1
+   local tgap = -1
+   local width = 0
+   local height = gap
+
+   for _, t in pairs(bt) do
+      local d = t:dim()
+      local h, w = t:size(d - 1), t:size(d)
+      local n = t:nElement() / (w * h)
+      width = math.max(width, gap + n * (gap + w))
+      height = height + gap + tgap + gap + h
+   end
+
+   local e = torch.Tensor(3, height, width):fill(1.0)
+   local y0 = 1 + gap
+
+   for _, t in pairs(bt) do
+      local d = t:dim()
+      local h, w = t:size(d - 1), t:size(d)
+      local n = t:nElement() / (w * h)
+      local z = t:norm() / math.sqrt(t:nElement())
+
+      local x0 = 1 + gap + math.floor( (width - n * (w + gap)) /2 )
+      local u = torch.Tensor(t:size()):copy(t):resize(n, h, w)
+      for m = 1, n do
+
+         for c = 1, 3 do
+            for y = 0, h+1 do
+               e[c][y0 + y - 1][x0     - 1] = 0.0
+               e[c][y0 + y - 1][x0 + w    ] = 0.0
+            end
+            for x = 0, w+1 do
+               e[c][y0     - 1][x0 + x - 1] = 0.0
+               e[c][y0 + h    ][x0 + x - 1] = 0.0
+            end
+         end
+
+         for y = 1, h do
+            for x = 1, w do
+               local v = u[m][y][x] / z
+               local r, g, b
+               if signed then
+                  if v < -1 then
+                     r, g, b = 0.0, 0.0, 1.0
+                  elseif v > 1 then
+                     r, g, b = 1.0, 0.0, 0.0
+                  elseif v >= 0 then
+                     r, g, b = 1.0, 1.0 - v, 1.0 - v
+                  else
+                     r, g, b = 1.0 + v, 1.0 + v, 1.0
+                  end
+               else
+                  if v <= 0 then
+                     r, g, b = 1.0, 1.0, 1.0
+                  elseif v > 1 then
+                     r, g, b = 0.0, 0.0, 0.0
+                  else
+                     r, g, b = 1.0 - v, 1.0 - v, 1.0 - v
+                  end
+               end
+               e[1][y0 + y - 1][x0 + x - 1] = r
+               e[2][y0 + y - 1][x0 + x - 1] = g
+               e[3][y0 + y - 1][x0 + x - 1] = b
+            end
+         end
+         x0 = x0 + w + gap
+      end
+      y0 = y0 + h + gap + tgap + gap
+   end
+
+   return e
+end
diff --git a/run.sh b/run.sh
index 890c2e0..fe041f2 100755
--- a/run.sh
+++ b/run.sh
@@ -28,72 +28,79 @@ set -o pipefail
 [[ "${TORCH_NB_THREADS}" ]] || echo "You can set \$TORCH_NB_THREADS to the proper value (default 1)."
 [[ "${TORCH_USE_GPU}" ]] || echo "You can set \$TORCH_USE_GPU to 'yes' or 'no' (default 'no')."
 [[ "${DYNCNN_DATA_DIR}" ]] || DYNCNN_DATA_DIR="./data/10p-mg"
-[[ "${DYNCNN_RESULT_DIR}" ]] || DYNCNN_RESULT_DIR="./results"
+
+[[ "${DYNCNN_RUNDIR}" ]] || DYNCNN_RUNDIR="./results"
 
 ######################################################################
-# Create the data-set if needed
-
-if [[ -d "${DYNCNN_DATA_DIR}" ]]; then
-    echo "Found ${DYNCNN_DATA_DIR}, checking the number of images in there."
-    if [[ $(find "${DYNCNN_DATA_DIR}" -name "dyn_*.png" | wc -l) == 150000 ]]; then
-        echo "Looks good !"
-    else
-        echo "I do not find the proper number of images. Please remove the dir and re-run this scripts, or fix manually."
-        exit 1
-    fi
-else
-    # Creating the data-base
+# Create the data-set if the directory does not exist
+
+if [[ ! -d "${DYNCNN_DATA_DIR}" ]]; then
+    cat <<EOF
+***************************************************************************
+                          Generate the data-set
+***************************************************************************
+EOF
+
     make -j -k
     mkdir -p "${DYNCNN_DATA_DIR}"
-    ./flatland 50000 \
-               --every_nth 4 --nb_frames 5 \
-               --multi_grasp --nb_shapes 10 \
+    # 17 frames every 16 is two frames: t+0, t+16
+    ./flatland 40000 \
+               --nb_shapes 10 \
+               --multi_grasp --every_nth 16 --nb_frames 17 \
                --dir "${DYNCNN_DATA_DIR}"
 fi
 
 ######################################################################
-# Train the model (~30h on a GTX1080)
-
-if [[ ! -f "${DYNCNN_RESULT_DIR}"/epoch_01000_model ]]; then
-    ./dyncnn.lua --heavy --dataDir "${DYNCNN_DATA_DIR}" \
-                 --resultFreq 100 \
-                 --resultDir "${DYNCNN_RESULT_DIR}" \
-                 --nbEpochs 1000
+# Train the model (takes 15h on a GTX 1080 with cuda 8.0, cudnn 5.1,
+# and recent torch)
+
+if [[ ! -f "${DYNCNN_RUNDIR}"/model_1000.t7 ]]; then
+    cat <<EOF
+***************************************************************************
+                  Train the model (should take a while)
+***************************************************************************
+EOF
+    ./dyncnn.lua -rundir "${DYNCNN_RUNDIR}"
 fi
 
 ######################################################################
-# Create the images of internal activations
+# Create the images of internal activations using the current.t7 in
+# the rundir
+
+cat <<EOF
+***************************************************************************
+                   Save the internal activation images
+***************************************************************************
+EOF
 
 for n in 2 12; do
-    ./dyncnn.lua --heavy --dataDir ./data/10p-mg/ \
-                 --learningStateFile "${DYNCNN_RESULT_DIR}"/epoch_01000_model \
-                 --resultDir "${DYNCNN_RESULT_DIR}" \
-                 --noLog \
-                 --exampleInternals ${n}
+    ./dyncnn.lua -rundir "${DYNCNN_RUNDIR}" -noLog -exampleInternals "${n}"
 done
 
 ######################################################################
 # Plot the loss curves if gnuplot is here
 
 if [[ $(which gnuplot) ]]; then
-    echo "Plotting losses.pdf."
+    cat <<EOF
+***************************************************************************
+                           Plot the loss curves
+***************************************************************************
+EOF
 
     TERMINAL="pdfcairo color transparent enhanced font \"Times,14\""
     EXTENSION="pdf"
 
     gnuplot <<EOF
 set terminal ${TERMINAL}
-set output "${DYNCNN_RESULT_DIR}/losses.${EXTENSION}"
+set output "${DYNCNN_RUNDIR}/losses.${EXTENSION}"
 set logscale x
 set logscale y
 set size ratio 0.75
 set xlabel "Number of epochs"
 set ylabel "Loss"
-plot '< grep "LOSS " "${DYNCNN_RESULT_DIR}"/log' using 2:4 with l lw 3 lc rgb '#c0c0ff' title 'Validation loss',\
-     '< grep "LOSS " "${DYNCNN_RESULT_DIR}"/log' using 2:3 with l lw 1 lc rgb '#000000' title 'Train loss'
+plot '< grep "LOSS " "${DYNCNN_RUNDIR}"/log' using 4:6 with l lw 3 lc rgb '#c0c0ff' title 'Validation loss',\
+     '< grep "LOSS " "${DYNCNN_RUNDIR}"/log' using 4:5 with l lw 1 lc rgb '#000000' title 'Train loss'
 
 EOF
 
 fi
-
-######################################################################
diff --git a/universe.cc b/universe.cc
index 658c32c..2b1383d 100644
--- a/universe.cc
+++ b/universe.cc
@@ -138,37 +138,6 @@ Polygon *Universe::pick_polygon(scalar_t x, scalar_t y) {
   return 0;
 }
 
-#ifdef XFIG_SUPPORT
-void Universe::print_xfig(XFigTracer *tracer) {
-  for(int n = 0; n < _nb_polygons; n++) {
-    if(_polygons[n]) {
-      _polygons[n]->color_xfig(tracer);
-    }
-  }
-  for(int n = 0; n < _nb_polygons; n++) {
-    if(_polygons[n]) {
-      _polygons[n]->print_xfig(tracer);
-    }
-  }
-}
-#endif
-
-#ifdef X11_SUPPORT
-void Universe::draw(SimpleWindow *window) {
-  for(int n = 0; n < _nb_polygons; n++) {
-    if(_polygons[n]) {
-      _polygons[n]->draw(window);
-    }
-  }
-
-  for(int n = 0; n < _nb_polygons; n++) {
-    if(_polygons[n]) {
-      _polygons[n]->draw_contours(window);
-    }
-  }
-}
-#endif
-
 void Universe::draw(Canvas *canvas) {
   for(int n = 0; n < _nb_polygons; n++) {
     if(_polygons[n]) {
diff --git a/universe.h b/universe.h
index 6cb4193..11dacc7 100644
--- a/universe.h
+++ b/universe.h
@@ -32,10 +32,6 @@
 #include "canvas.h"
 #include "polygon.h"
 
-#ifdef X11_SUPPORT
-#include "simple_window.h"
-#endif
-
 using namespace std;
 
 class Universe {
@@ -64,14 +60,6 @@ public:
 
   Polygon *pick_polygon(scalar_t x, scalar_t y);
 
-#ifdef XFIG_SUPPORT
-  void print_xfig(XFigTracer *tracer);
-#endif
-
-#ifdef X11_SUPPORT
-  void draw(SimpleWindow *window);
-#endif
-
   void draw(Canvas *canvas);
 };
 
-- 
2.20.1