#!/usr/bin/env luajit --[[ dyncnn is a deep-learning algorithm for the prediction of interacting object dynamics Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ Written by Francois Fleuret This file is part of dyncnn. dyncnn is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation. dyncnn is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with dyncnn. If not, see . ]]-- require 'torch' require 'nn' require 'optim' require 'image' require 'fftb' ---------------------------------------------------------------------- -- Command line arguments local cmd = torch.CmdLine() cmd:text('General setup') cmd:option('-seed', 1, 'initial random seed') cmd:option('-nbThreads', defaultNbThreads, 'how many threads (environment variable TORCH_NB_THREADS)') cmd:option('-useGPU', defaultUseGPU, 'should we use cuda (environment variable TORCH_USE_GPU)') cmd:option('-fastGPU', true, 'should we go as fast as possible, possibly non-deterministically') cmd:text('') cmd:text('Log') cmd:option('-resultFreq', 100, 'at which epoch frequency should we save result images') cmd:option('-exampleInternals', '', 'list of comma-separated indices for inner activation images') cmd:option('-noLog', false, 'should we prevent logging') cmd:option('-rundir', '', 'the directory for results') cmd:option('-deltaImages', false, 'should we highlight the difference in result images') cmd:text('') cmd:text('Network structure') cmd:option('-filterSize', 5) cmd:option('-nbChannels', 16) cmd:option('-nbBlocks', 8) cmd:text('') cmd:text('Training') cmd:option('-nbEpochs', 1000, 'nb of epochs for the heavy setting') cmd:option('-learningRate', 0.1, 'learning rate') cmd:option('-batchSize', 128, 'size of the mini-batches') cmd:option('-nbTrainSamples', 32768) cmd:option('-nbValidationSamples', 1024) cmd:option('-nbTestSamples', 1024) cmd:text('') cmd:text('Problem to solve') cmd:option('-dataDir', './data/10p-mg', 'data directory') cmd:addTime('DYNCNN','%F %T') params = cmd:parse(arg) ---------------------------------------------------------------------- fftbInit(cmd, params) for _, c in pairs({ 'date', 'uname -a', 'git log -1 --format=%H' }) do logCommand(c) end ---------------------------------------------------------------------- function loadData(first, nb, name) print('Loading data `' .. name .. '\'.') local data = {} data.name = name data.nbSamples = nb data.width = 64 data.height = 64 data.input = ffnn.SlowTensor(data.nbSamples, 2, data.height, data.width) data.target = ffnn.SlowTensor(data.nbSamples, 1, data.height, data.width) for i = 1, data.nbSamples do local n = i-1 + first-1 local frame = image.load(string.format('%s/%03d/dyn_%06d.png', params.dataDir, math.floor(n/1000), n)) frame:mul(-1.0):add(1.0) frame = frame:max(1):select(1, 1) data.input[i][1]:copy(frame:sub(0 * data.height + 1, 1 * data.height, 1 * data.width + 1, 2 * data.width)) data.input[i][2]:copy(frame:sub(0 * data.height + 1, 1 * data.height, 0 * data.width + 1, 1 * data.width)) data.target[i][1]:copy(frame:sub(1 * data.height + 1, 2 * data.height, 1 * data.width + 1, 2 * data.width)) end return data end ---------------------------------------------------------------------- function collectAllOutputs(model, collection, which) if torch.type(model) == 'nn.Sequential' then for i = 1, #model.modules do collectAllOutputs(model.modules[i], collection, which) end elseif not which or which[torch.type(model)] then if torch.isTensor(model.output) then collection.nb = collection.nb + 1 collection.outputs[collection.nb] = model.output end end end function saveInternalsImage(model, data, n) -- Explicitely copy to keep input as a ffnn.FastTensor local input = ffnn.FastTensor(1, 2, data.height, data.width) input:copy(data.input:narrow(1, n, 1)) local output = model:forward(input) local collection = {} collection.outputs = {} collection.nb = 1 collection.outputs[collection.nb] = input collectAllOutputs(model, collection, { ['nn.ReLU'] = true, ['cunn.ReLU'] = true, ['cudnn.ReLU'] = true, } ) if collection.outputs[collection.nb] ~= model.output then collection.nb = collection.nb + 1 collection.outputs[collection.nb] = model.output end local fileName = string.format('%s/internals_%s_%06d.png', params.rundir, data.name, n) print('Saving ' .. fileName) image.save(fileName, imageFromTensors(collection.outputs)) end ---------------------------------------------------------------------- function highlightImage(a, b) if params.deltaImages then local h = torch.csub(a, b):abs() h:div(1/h:max()):mul(0.9):add(0.1) return torch.cmul(a, h) else return a end end function saveResultImage(model, data, nbMax) local criterion = nn.MSECriterion() if params.useGPU then print('Moving the criterion to the GPU.') criterion:cuda() end local input = ffnn.FastTensor(1, 2, data.height, data.width) local target = ffnn.FastTensor(1, 1, data.height, data.width) local nbMax = nbMax or 50 local nb = math.min(nbMax, data.nbSamples) model:evaluate() printf('Write %d result images for `%s\'.', nb, data.name) local lossFile = io.open(params.rundir .. '/result_' .. data.name .. '_losses.dat', 'w') for n = 1, nb do -- Explicitely copy to keep input as a ffnn.FastTensor input:copy(data.input:narrow(1, n, 1)) target:copy(data.target:narrow(1, n, 1)) local output = model:forward(input) local loss = criterion:forward(output, target) output = ffnn.SlowTensor(output:size()):copy(output) -- We use our magical img.lua to create the result images local comp comp = { { vertical = true, { pad = 1, data.input[n][1] }, { pad = 1, data.input[n][2] }, { pad = 1, highlightImage(data.target[n][1], data.input[n][1]) }, { pad = 1, highlightImage(output[1][1], data.input[n][1]) }, } } local result = combineImages(1.0, comp) result:mul(-1.0):add(1.0) local fileName = string.format('result_%s_%06d.png', data.name, n) image.save(params.rundir .. '/' .. fileName, result) lossFile:write(string.format('%f %s\n', loss, fileName)) end end ---------------------------------------------------------------------- function createTower(filterSize, nbChannels, nbBlocks) local tower if nbBlocks == 0 then tower = nn.Identity() else tower = ffnn.Sequential() for b = 1, nbBlocks do local block = ffnn.Sequential() block:add(ffnn.SpatialConvolution(nbChannels, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) block:add(ffnn.SpatialBatchNormalization(nbChannels)) block:add(ffnn.ReLU(true)) block:add(ffnn.SpatialConvolution(nbChannels, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) local parallel = ffnn.ConcatTable() parallel:add(block):add(ffnn.Identity()) tower:add(parallel):add(ffnn.CAddTable(true)) tower:add(ffnn.SpatialBatchNormalization(nbChannels)) tower:add(ffnn.ReLU(true)) end end return tower end function createModel(imageWidth, imageHeight, filterSize, nbChannels, nbBlocks) local model = ffnn.Sequential() -- Encode the two input channels (grasping image and starting -- configuration) into the internal number of channels model:add(ffnn.SpatialConvolution(2, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) model:add(ffnn.SpatialBatchNormalization(nbChannels)) model:add(ffnn.ReLU(true)) -- Add the resnet modules model:add(createTower(filterSize, nbChannels, nbBlocks)) -- Decode down to a single channel, which is the final image model:add(ffnn.SpatialConvolution(nbChannels, 1, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) return model end ---------------------------------------------------------------------- function trainModel(model, trainSet, validationSet) local criterion = nn.MSECriterion() local batchSize = params.batchSize local startingEpoch = 1 if model.epoch then startingEpoch = model.epoch + 1 end if model.RNGState then printfc(colors.red, 'Using the RNG state from the loaded model.') torch.setRNGState(model.RNGState) end if params.useGPU then print('Moving the model and criterion to the GPU.') model:cuda() criterion:cuda() end print('Starting training.') local parameters, gradParameters = model:getParameters() printf('The model has %d parameters.', parameters:storage():size(1)) local averageTrainLoss, averageValidationLoss local trainTime, validationTime ---------------------------------------------------------------------- local sgdState = { learningRate = params.learningRate, momentum = 0, learningRateDecay = 0 } local batch = {} for e = startingEpoch, params.nbEpochs do model:training() local permutation = torch.randperm(trainSet.nbSamples) local accLoss = 0.0 local nbBatches = 0 local startTime = sys.clock() for b = 1, trainSet.nbSamples, batchSize do fillBatch(trainSet, b, batch, permutation) local opfunc = function(x) -- Surprisingly, copy() needs this check if x ~= parameters then parameters:copy(x) end local output = model:forward(batch.input) local loss = criterion:forward(output, batch.target) local dLossdOutput = criterion:backward(output, batch.target) gradParameters:zero() model:backward(batch.input, dLossdOutput) accLoss = accLoss + loss nbBatches = nbBatches + 1 return loss, gradParameters end optim.sgd(opfunc, parameters, sgdState) end trainTime = sys.clock() - startTime averageTrainLoss = accLoss / nbBatches ---------------------------------------------------------------------- -- Validation losses do model:evaluate() local accLoss = 0.0 local nbBatches = 0 local startTime = sys.clock() for b = 1, validationSet.nbSamples, batchSize do fillBatch(validationSet, b, batch) local output = model:forward(batch.input) accLoss = accLoss + criterion:forward(output, batch.target) nbBatches = nbBatches + 1 end validationTime = sys.clock() - startTime averageValidationLoss = accLoss / nbBatches; end ---------------------------------------------------------------------- printfc(colors.green, 'epoch %d acc_train_loss %f validation_loss %f [train %.02fs total %.02fms / sample, validation %.02fs total %.02fms / sample]', e, averageTrainLoss, averageValidationLoss, trainTime, 1000 * trainTime / trainSet.nbSamples, validationTime, 1000 * validationTime / validationSet.nbSamples ) ---------------------------------------------------------------------- -- Save a persistent state so that we can restart from there model:clearState() model.RNGState = torch.getRNGState() model.epoch = e torch.save(params.rundir .. '/model_last.t7', model) ---------------------------------------------------------------------- -- Save a duplicate of the persistent state from time to time if params.resultFreq > 0 and e%params.resultFreq == 0 then torch.save(string.format('%s/model_%04d.t7', params.rundir, e), model) saveResultImage(model, trainSet) saveResultImage(model, validationSet) end end end ---------------------------------------------------------------------- -- main local trainSet = loadData(1, params.nbTrainSamples, 'train') local validationSet = loadData(params.nbTrainSamples + 1, params.nbValidationSamples, 'validation') local model if pcall(function () model = torch.load(params.rundir .. '/model_last.t7') end) then printfc(colors.red, 'Found a model with %d epochs completed, starting from there.', model.epoch) if params.exampleInternals ~= '' then for _, i in ipairs(string.split(params.exampleInternals, ',')) do saveInternalsImage(model, validationSet, tonumber(i)) end os.exit(0) end else model = createModel(trainSet.width, trainSet.height, params.filterSize, params.nbChannels, params.nbBlocks) end trainModel(model, trainSet, validationSet) ---------------------------------------------------------------------- -- Test local testSet = loadData(params.nbTrainSamples + params.nbValidationSamples + 1, params.nbTestSamples, 'test') if params.useGPU then print('Moving the model and criterion to the GPU.') model:cuda() end saveResultImage(model, trainSet) saveResultImage(model, validationSet) saveResultImage(model, testSet, 1024)