#!/usr/bin/env luajit --[[ dyncnn is a deep-learning algorithm for the prediction of interacting object dynamics Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ Written by Francois Fleuret This file is part of dyncnn. dyncnn is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation. dyncnn is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with dyncnn. If not, see . ]]-- require 'torch' require 'nn' require 'optim' require 'image' require 'img' ---------------------------------------------------------------------- function printf(f, ...) print(string.format(f, unpack({...}))) end colors = sys.COLORS function printfc(c, f, ...) print(c .. string.format(f, unpack({...})) .. colors.black) end function logCommand(c) print(colors.blue .. '[' .. c .. '] -> [' .. sys.execute(c) .. ']' .. colors.black) end ---------------------------------------------------------------------- -- Environment variables local defaultNbThreads = 1 local defaultUseGPU = false if os.getenv('TORCH_NB_THREADS') then defaultNbThreads = os.getenv('TORCH_NB_THREADS') print('Environment variable TORCH_NB_THREADS is set and equal to ' .. defaultNbThreads) else print('Environment variable TORCH_NB_THREADS is not set, default is ' .. defaultNbThreads) end if os.getenv('TORCH_USE_GPU') then defaultUseGPU = os.getenv('TORCH_USE_GPU') == 'yes' print('Environment variable TORCH_USE_GPU is set and evaluated as ' .. tostring(defaultUseGPU)) else print('Environment variable TORCH_USE_GPU is not set, default is ' .. tostring(defaultUseGPU)) end ---------------------------------------------------------------------- -- Command line arguments local cmd = torch.CmdLine() cmd:text('General setup') cmd:option('-seed', 1, 'initial random seed') cmd:option('-nbThreads', defaultNbThreads, 'how many threads (environment variable TORCH_NB_THREADS)') cmd:option('-useGPU', defaultUseGPU, 'should we use cuda (environment variable TORCH_USE_GPU)') cmd:text('') cmd:text('Log') cmd:option('-resultFreq', 100, 'at which epoch frequency should we save result images') cmd:option('-exampleInternals', '', 'list of comma-separated indices for inner activation images') cmd:option('-noLog', false, 'should we prevent logging') cmd:option('-rundir', '', 'the directory for results') cmd:option('-deltaImages', false, 'should we highlight the difference in result images') cmd:text('') cmd:text('Network structure') cmd:option('-filterSize', 5) cmd:option('-nbChannels', 16) cmd:option('-nbBlocks', 8) cmd:text('') cmd:text('Training') cmd:option('-nbEpochs', 1000, 'nb of epochs for the heavy setting') cmd:option('-learningRate', 0.1, 'learning rate') cmd:option('-batchSize', 128, 'size of the mini-batches') cmd:option('-nbTrainSamples', 32768) cmd:option('-nbValidationSamples', 1024) cmd:option('-nbTestSamples', 1024) cmd:text('') cmd:text('Problem to solve') cmd:option('-dataDir', './data/10p-mg', 'data directory') ------------------------------ -- Log and stuff cmd:addTime('DYNCNN','%F %T') params = cmd:parse(arg) if params.rundir == '' then params.rundir = cmd:string('exp', params, { }) end paths.mkdir(params.rundir) if not params.noLog then -- Append to the log if there is one cmd:log(io.open(params.rundir .. '/log', 'a'), params) end ---------------------------------------------------------------------- -- The experiment per se if params.predictGrasp then params.targetDepth = 2 else params.targetDepth = 1 end ---------------------------------------------------------------------- -- Initializations torch.setnumthreads(params.nbThreads) torch.setdefaulttensortype('torch.FloatTensor') torch.manualSeed(params.seed) ---------------------------------------------------------------------- -- Dealing with the CPU/GPU -- mynn will take entries in that order: mynn, cudnn, cunn, nn mynn = {} setmetatable(mynn, { __index = function(table, key) return (cudnn and cudnn[key]) or (cunn and cunn[key]) or nn[key] end } ) -- These are the tensors that can be kept on the CPU mynn.SlowTensor = torch.Tensor -- These are the tensors that should be moved to the GPU mynn.FastTensor = torch.Tensor if params.useGPU then require 'cutorch' require 'cunn' require 'cudnn' cudnn.benchmark = true cudnn.fastest = true mynn.FastTensor = torch.CudaTensor end ---------------------------------------------------------------------- function loadData(first, nb, name) print('Loading data `' .. name .. '\'.') local data = {} data.name = name data.nbSamples = nb data.width = 64 data.height = 64 data.input = mynn.SlowTensor(data.nbSamples, 2, data.height, data.width) data.target = mynn.SlowTensor(data.nbSamples, 1, data.height, data.width) for i = 1, data.nbSamples do local n = i-1 + first-1 local frame = image.load(string.format('%s/%03d/dyn_%06d.png', params.dataDir, math.floor(n/1000), n)) frame:mul(-1.0):add(1.0) frame = frame:max(1):select(1, 1) data.input[i][1]:copy(frame:sub(0 * data.height + 1, 1 * data.height, 1 * data.width + 1, 2 * data.width)) data.input[i][2]:copy(frame:sub(0 * data.height + 1, 1 * data.height, 0 * data.width + 1, 1 * data.width)) data.target[i][1]:copy(frame:sub(1 * data.height + 1, 2 * data.height, 1 * data.width + 1, 2 * data.width)) end return data end ---------------------------------------------------------------------- function collectAllOutputs(model, collection, which) if torch.type(model) == 'nn.Sequential' then for i = 1, #model.modules do collectAllOutputs(model.modules[i], collection, which) end elseif not which or which[torch.type(model)] then if torch.isTensor(model.output) then collection.nb = collection.nb + 1 collection.outputs[collection.nb] = model.output end end end function saveInternalsImage(model, data, n) -- Explicitely copy to keep input as a mynn.FastTensor local input = mynn.FastTensor(1, 2, data.height, data.width) input:copy(data.input:narrow(1, n, 1)) local output = model:forward(input) local collection = {} collection.outputs = {} collection.nb = 1 collection.outputs[collection.nb] = input collectAllOutputs(model, collection, { ['nn.ReLU'] = true, ['cunn.ReLU'] = true, ['cudnn.ReLU'] = true, } ) if collection.outputs[collection.nb] ~= model.output then collection.nb = collection.nb + 1 collection.outputs[collection.nb] = model.output end local fileName = string.format('%s/internals_%s_%06d.png', params.rundir, data.name, n) print('Saving ' .. fileName) image.save(fileName, imageFromTensors(collection.outputs)) end ---------------------------------------------------------------------- function highlightImage(a, b) if params.deltaImages then local h = torch.csub(a, b):abs() h:div(1/h:max()):mul(0.9):add(0.1) return torch.cmul(a, h) else return a end end function saveResultImage(model, data, nbMax) local criterion = nn.MSECriterion() if params.useGPU then print('Moving the criterion to the GPU.') criterion:cuda() end local input = mynn.FastTensor(1, 2, data.height, data.width) local target = mynn.FastTensor(1, 1, data.height, data.width) local nbMax = nbMax or 50 local nb = math.min(nbMax, data.nbSamples) model:evaluate() printf('Write %d result images for `%s\'.', nb, data.name) local lossFile = io.open(params.rundir .. '/result_' .. data.name .. '_losses.dat', 'w') for n = 1, nb do -- Explicitely copy to keep input as a mynn.FastTensor input:copy(data.input:narrow(1, n, 1)) target:copy(data.target:narrow(1, n, 1)) local output = model:forward(input) local loss = criterion:forward(output, target) output = mynn.SlowTensor(output:size()):copy(output) -- We use our magical img.lua to create the result images local comp comp = { { vertical = true, { pad = 1, data.input[n][1] }, { pad = 1, data.input[n][2] }, { pad = 1, highlightImage(data.target[n][1], data.input[n][1]) }, { pad = 1, highlightImage(output[1][1], data.input[n][1]) }, } } local result = combineImages(1.0, comp) result:mul(-1.0):add(1.0) local fileName = string.format('result_%s_%06d.png', data.name, n) image.save(params.rundir .. '/' .. fileName, result) lossFile:write(string.format('%f %s\n', loss, fileName)) end end ---------------------------------------------------------------------- function createTower(filterSize, nbChannels, nbBlocks) local tower if nbBlocks == 0 then tower = nn.Identity() else tower = mynn.Sequential() for b = 1, nbBlocks do local block = mynn.Sequential() block:add(mynn.SpatialConvolution(nbChannels, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) block:add(mynn.SpatialBatchNormalization(nbChannels)) block:add(mynn.ReLU(true)) block:add(mynn.SpatialConvolution(nbChannels, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) local parallel = mynn.ConcatTable() parallel:add(block):add(mynn.Identity()) tower:add(parallel):add(mynn.CAddTable(true)) tower:add(mynn.SpatialBatchNormalization(nbChannels)) tower:add(mynn.ReLU(true)) end end return tower end function createModel(imageWidth, imageHeight, filterSize, nbChannels, nbBlocks) local model = mynn.Sequential() -- Encode the two input channels (grasping image and starting -- configuration) into the internal number of channels model:add(mynn.SpatialConvolution(2, nbChannels, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) model:add(mynn.SpatialBatchNormalization(nbChannels)) model:add(mynn.ReLU(true)) -- Add the resnet modules model:add(createTower(filterSize, nbChannels, nbBlocks)) -- Decode down to a single channel, which is the final image model:add(mynn.SpatialConvolution(nbChannels, 1, filterSize, filterSize, 1, 1, (filterSize - 1) / 2, (filterSize - 1) / 2)) return model end ---------------------------------------------------------------------- function fillBatch(data, first, batch, permutation) local actualBatchSize = math.min(params.batchSize, data.input:size(1) - first + 1) if actualBatchSize ~= batch.input:size(1) then local size = batch.input:size() size[1] = actualBatchSize batch.input:resize(size) end if actualBatchSize ~= batch.target:size(1) then local size = batch.target:size() size[1] = actualBatchSize batch.target:resize(size) end for k = 1, batch.input:size(1) do local i if permutation then i = permutation[first + k - 1] else i = first + k - 1 end batch.input[k] = data.input[i] batch.target[k] = data.target[i] end end function trainModel(model, trainSet, validationSet) local criterion = nn.MSECriterion() local batchSize = params.batchSize local batch = {} batch.input = mynn.FastTensor(batchSize, 2, trainSet.height, trainSet.width) batch.target = mynn.FastTensor(batchSize, 1, trainSet.height, trainSet.width) local startingEpoch = 1 if model.epoch then startingEpoch = model.epoch + 1 end if model.RNGState then printfc(colors.red, 'Using the RNG state from the loaded model.') torch.setRNGState(model.RNGState) end if params.useGPU then print('Moving the model and criterion to the GPU.') model:cuda() criterion:cuda() end print('Starting training.') local parameters, gradParameters = model:getParameters() printf('The model has %d parameters.', parameters:storage():size(1)) local averageTrainLoss, averageValidationLoss local trainTime, validationTime ---------------------------------------------------------------------- local sgdState = { learningRate = params.learningRate, momentum = 0, learningRateDecay = 0 } for e = startingEpoch, params.nbEpochs do model:training() local permutation = torch.randperm(trainSet.nbSamples) local accLoss = 0.0 local nbBatches = 0 local startTime = sys.clock() for b = 1, trainSet.nbSamples, batchSize do fillBatch(trainSet, b, batch, permutation) local opfunc = function(x) -- Surprisingly, copy() needs this check if x ~= parameters then parameters:copy(x) end local output = model:forward(batch.input) local loss = criterion:forward(output, batch.target) local dLossdOutput = criterion:backward(output, batch.target) gradParameters:zero() model:backward(batch.input, dLossdOutput) accLoss = accLoss + loss nbBatches = nbBatches + 1 return loss, gradParameters end optim.sgd(opfunc, parameters, sgdState) end trainTime = sys.clock() - startTime averageTrainLoss = accLoss / nbBatches ---------------------------------------------------------------------- -- Validation losses do model:evaluate() local accLoss = 0.0 local nbBatches = 0 local startTime = sys.clock() for b = 1, validationSet.nbSamples, batchSize do fillBatch(validationSet, b, batch) local output = model:forward(batch.input) accLoss = accLoss + criterion:forward(output, batch.target) nbBatches = nbBatches + 1 end validationTime = sys.clock() - startTime averageValidationLoss = accLoss / nbBatches; end ---------------------------------------------------------------------- printfc(colors.green, 'epoch %d acc_train_loss %f validation_loss %f [train %.02fs total %.02fms / sample, validation %.02fs total %.02fms / sample]', e, averageTrainLoss, averageValidationLoss, trainTime, 1000 * trainTime / trainSet.nbSamples, validationTime, 1000 * validationTime / validationSet.nbSamples ) ---------------------------------------------------------------------- -- Save a persistent state so that we can restart from there model:clearState() model.RNGState = torch.getRNGState() model.epoch = e torch.save(params.rundir .. '/model_last.t7', model) ---------------------------------------------------------------------- -- Save a duplicate of the persistent state from time to time if params.resultFreq > 0 and e%params.resultFreq == 0 then torch.save(string.format('%s/model_%04d.t7', params.rundir, e), model) saveResultImage(model, trainSet) saveResultImage(model, validationSet) end end end function createAndTrainModel(trainSet, validationSet) -- Load the current training state, or create a new model from -- scratch if pcall(function () model = torch.load(params.rundir .. '/model_last.t7') end) then printfc(colors.red, 'Found a model with %d epochs completed, starting from there.', model.epoch) if params.exampleInternals ~= '' then for _, i in ipairs(string.split(params.exampleInternals, ',')) do saveInternalsImage(model, validationSet, tonumber(i)) end os.exit(0) end else model = createModel(trainSet.width, trainSet.height, params.filterSize, params.nbChannels, params.nbBlocks) end trainModel(model, trainSet, validationSet) return model end ---------------------------------------------------------------------- -- main for _, c in pairs({ 'date', 'uname -a', 'git log -1 --format=%H' }) do logCommand(c) end local trainSet = loadData(1, params.nbTrainSamples, 'train') local validationSet = loadData(params.nbTrainSamples + 1, params.nbValidationSamples, 'validation') local model = createAndTrainModel(trainSet, validationSet) ---------------------------------------------------------------------- -- Test local testSet = loadData(params.nbTrainSamples + params.nbValidationSamples + 1, params.nbTestSamples, 'test') if params.useGPU then print('Moving the model and criterion to the GPU.') model:cuda() end saveResultImage(model, trainSet) saveResultImage(model, validationSet) saveResultImage(model, testSet, 1024)