mlp.cc

   1 /*
   2  *  mlp-mnist is an implementation of a multi-layer neural network.
   3  *
   4  *  Copyright (c) 2006 École Polytechnique Fédérale de Lausanne,
   5  *  http://www.epfl.ch
   6  *
   7  *  Written by Francois Fleuret <francois@fleuret.org>
   8  *
   9  *  This file is part of mlp-mnist.
  10  *
  11  *  mlp-mnist is free software: you can redistribute it and/or modify
  12  *  it under the terms of the GNU General Public License version 3 as
  13  *  published by the Free Software Foundation.
  14  *
  15  *  mlp-mnist is distributed in the hope that it will be useful, but
  16  *  WITHOUT ANY WARRANTY; without even the implied warranty of
  17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  *  General Public License for more details.
  19  *
  20  *  You should have received a copy of the GNU General Public License
  21  *  along with mlp-mnist.  If not, see <http://www.gnu.org/licenses/>.
  22  *
  23  */
  24
  25 // LeCun et al. 1998:
  26
  27 // 2-layer NN, 300 hidden units, mean square error  4.70%
  28 // 2-layer NN, 1000 hidden units                    4.50%
  29 // 3-layer NN, 300+100 hidden units                 3.05%
  30 // 3-layer NN, 500+150 hidden units                 2.95%
  31
  32 /*********************************************************************
  33
  34    This program, trained on 20,000 (+ 20,000 for the stopping
  35    criterion), tested on the 10,000 of the MNIST test set 100 hidden
  36    neurons, basic network, 3.48%
  37
  38    TRAINING
  39
  40     ./ann --nb-training-examples 20000 --nb-validation-examples 20000 \
  41         --mlp-structure 784,200,10 \
  42         --data-files ${DATA_DIR}/train-images-idx3-ubyte ${DATA_DIR}/train-labels-idx1-ubyte \
  43         --save-mlp simple.mlp
  44
  45    TEST
  46
  47     ./ann --load-mlp simple.mlp \
  48         --data-files ${DATA_DIR}/t10k-images-idx3-ubyte ${DATA_DIR}/t10k-labels-idx1-ubyte \
  49         --nb-test-examples 10000
  50
  51 *********************************************************************/
  52
  53 #include <iostream>
  54 #include <fstream>
  55 #include <cmath>
  56 #include <stdio.h>
  57 #include <stdlib.h>
  58 #include <string.h>
  59
  60 using namespace std;
  61
  62 #include "images.h"
  63 #include "neural.h"
  64
  65 #define SMALL_BUFFER_SIZE 1024
  66
  67 //////////////////////////////////////////////////////////////////////
  68 // Global Variables
  69 //////////////////////////////////////////////////////////////////////
  70
  71 int nb_experiment = 0;
  72 int nb_training_examples = 0;
  73 int nb_validation_examples = 0;
  74 int nb_test_examples = 0;
  75 bool save_data = false;
  76
  77 char images_filename[SMALL_BUFFER_SIZE] = "\0";
  78 char labels_filename[SMALL_BUFFER_SIZE] = "\0";
  79 char opt_load_filename[SMALL_BUFFER_SIZE] = "\0";
  80 char opt_save_filename[SMALL_BUFFER_SIZE] = "\0";
  81 char opt_layer_sizes[SMALL_BUFFER_SIZE] = "\0";
  82
  83 char *next_word(char *buffer, char *r, int buffer_size) {
  84   char *s;
  85   s = buffer;
  86   if(r != NULL)
  87     {
  88       if(*r == '"') {
  89         r++;
  90         while((*r != '"') && (*r != '\0') &&
  91               (s<buffer+buffer_size-1))
  92           *s++ = *r++;
  93         if(*r == '"') r++;
  94       } else {
  95         while((*r != '\r') && (*r != '\n') && (*r != '\0') &&
  96               (*r != '\t') && (*r != ' ') && (*r != ',') &&
  97               (s<buffer+buffer_size-1))
  98           *s++ = *r++;
  99       }
 100
 101       while((*r == ' ') || (*r == '\t') || (*r == ',')) r++;
 102       if((*r == '\0') || (*r=='\r') || (*r=='\n')) r = NULL;
 103     }
 104   *s = '\0';
 105   return r;
 106 }
 107
 108 //////////////////////////////////////////////////////////////////////
 109 // Simple routine to check we have enough parameters
 110 //////////////////////////////////////////////////////////////////////
 111
 112 void check_opt(int argc, char **argv, int n_opt, int n, const char *help) {
 113   if(n_opt + n >= argc) {
 114     cerr << "Missing argument for " << argv[n_opt] << ".\n";
 115     cerr << "Expecting " << help << ".\n";
 116     exit(1);
 117   }
 118 }
 119
 120 void print_help_and_exit(int e) {
 121   cout << "ANN. Written by François Fleuret.\n";
 122   cout << "$Id: ann.cc,v 1.1 2005-12-13 17:19:11 fleuret Exp $\n";
 123   cout<< "\n";
 124   exit(e);
 125 }
 126
 127 int main(int argc, char **argv) {
 128
 129   if(argc == 1) print_help_and_exit(1);
 130
 131   nice(10);
 132
 133   // Parsing the command line parameters ///////////////////////////////
 134
 135   int i = 1;
 136
 137   while(i < argc) {
 138
 139     if(argc == 1 || strcmp(argv[i], "--help") == 0) print_help_and_exit(0);
 140
 141     else if(strcmp(argv[i], "--data-files") == 0) {
 142       check_opt(argc, argv, i, 2, "<string: pixel filename> <string: label filename>");
 143       strncpy(images_filename, argv[i+1], SMALL_BUFFER_SIZE);
 144       strncpy(labels_filename, argv[i+2], SMALL_BUFFER_SIZE);
 145       i += 3;
 146     }
 147
 148     else if(strcmp(argv[i], "--load-mlp") == 0) {
 149       check_opt(argc, argv, i, 1, "<string: mlp filename>");
 150       strncpy(opt_load_filename, argv[i+1], SMALL_BUFFER_SIZE);
 151       i += 2;
 152     }
 153
 154     else if(strcmp(argv[i], "--mlp-structure") == 0) {
 155       check_opt(argc, argv, i, 1, "<int: input layer size>,<int: first hidden layer size>,[...,]<int: output layer size>");
 156       strncpy(opt_layer_sizes, argv[i+1], SMALL_BUFFER_SIZE);
 157       i += 2;
 158     }
 159
 160     else if(strcmp(argv[i], "--save-mlp") == 0) {
 161       check_opt(argc, argv, i, 1, "<string: mlp filename>");
 162       strncpy(opt_save_filename, argv[i+1], SMALL_BUFFER_SIZE);
 163       i += 2;
 164     }
 165
 166     else if(strcmp(argv[i], "--nb-experiment") == 0) {
 167       check_opt(argc, argv, i, 1, "<int: number of the experiment>");
 168       nb_experiment = atoi(argv[i+1]);
 169       i += 2;
 170     }
 171
 172     else if(strcmp(argv[i], "--nb-training-examples") == 0) {
 173       check_opt(argc, argv, i, 1, "<int: number of examples for the training>");
 174       nb_training_examples = atoi(argv[i+1]);
 175       i += 2;
 176     }
 177
 178     else if(strcmp(argv[i], "--nb-validation-examples") == 0) {
 179       check_opt(argc, argv, i, 1, "<int: number of examples for the validation>");
 180       nb_validation_examples = atoi(argv[i+1]);
 181       i += 2;
 182     }
 183
 184     else if(strcmp(argv[i], "--nb-test-examples") == 0) {
 185       check_opt(argc, argv, i, 1, "<int: number of examples for the test>");
 186       nb_test_examples = atoi(argv[i+1]);
 187       i += 2;
 188     }
 189
 190     else if(strcmp(argv[i], "--save-data") == 0) {
 191       save_data = true;
 192       i++;
 193     }
 194
 195     else {
 196       cerr << "Unknown option " << argv[i] << "\n";
 197       print_help_and_exit(1);
 198     }
 199   }
 200
 201   ImageSet image_set;
 202   cout << "Loading the data file ..."; cout.flush();
 203   image_set.load_mnist_format(images_filename, labels_filename);
 204   cout << " done.\n"; cout.flush();
 205
 206   cout << "Database contains " << image_set.nb_pics()
 207        << " images of resolution " << image_set.width() << "x" << image_set.height()
 208        << " divided into " << image_set.nb_obj() << " objects.\n";
 209
 210   srand48(nb_experiment);
 211
 212   int nb_layers = 0;
 213   int *layer_sizes = 0;
 214
 215   if(opt_layer_sizes[0]) {
 216     char *s = opt_layer_sizes;
 217     char token[SMALL_BUFFER_SIZE];
 218     while(s) { s = next_word(token, s, SMALL_BUFFER_SIZE); nb_layers++; }
 219
 220     if(nb_layers < 2) {
 221       cerr << "Need at least two layers.\n";
 222       exit(1);
 223     }
 224
 225     layer_sizes = new int[nb_layers];
 226     s = opt_layer_sizes;
 227     int n = 0;
 228     while(s) { s = next_word(token, s, SMALL_BUFFER_SIZE); layer_sizes[n++] = atoi(token); }
 229   }
 230
 231   // Loading or creating a perceptron from scratch /////////////////////
 232
 233   MultiLayerPerceptron *mlp = 0;
 234
 235   if(opt_load_filename[0]) {
 236
 237     ifstream stream(opt_load_filename);
 238     if(stream.fail()) {
 239       cerr << "Can not read " << opt_load_filename << ".\n";
 240       exit(1);
 241     }
 242
 243     cout << "Loading network " << opt_load_filename << " ... "; cout.flush();
 244     mlp = new MultiLayerPerceptron(stream);
 245     cout << "done (layers of sizes";
 246     for(int l = 0; l < mlp->nb_layers(); l++) cout << " " << mlp->layer_size(l);
 247     cout << ")\n"; cout.flush();
 248
 249   } else if(nb_layers > 0) {
 250
 251     if(layer_sizes[0] != image_set.width() * image_set.height() ||
 252        layer_sizes[nb_layers-1] != image_set.nb_obj()) {
 253       cerr << "For this data set, the input layer has to be of size " << image_set.width() * image_set.height() << ",\n";
 254       cerr << "and the output has to be of size " << image_set.nb_obj() << ".\n";
 255       exit(1);
 256     }
 257
 258     cout << "Creating a new network (layers of sizes";
 259     for(int i = 0; i < nb_layers; i++) cout << " " << layer_sizes[i];
 260     cout << ").\n";
 261
 262     mlp = new MultiLayerPerceptron(nb_layers, layer_sizes);
 263     mlp->init_random_weights(1e-1);
 264   }
 265
 266   // Training the perceptron ///////////////////////////////////////////
 267
 268   ImageSet training_set, validation_set, test_set;
 269
 270   if(nb_training_examples > 0)
 271     training_set.sample_among_unused_pictures(image_set, nb_training_examples);
 272
 273   if(nb_validation_examples > 0)
 274     validation_set.sample_among_unused_pictures(image_set, nb_validation_examples);
 275
 276   if(save_data && mlp) mlp->save_data();
 277
 278   if(nb_training_examples > 0) {
 279     if(validation_set.nb_pics() == 0) {
 280       cerr << "We need validation pictures for training.\n";
 281       exit(1);
 282     }
 283     cout << "Training the network with " << nb_training_examples << " training and " << nb_validation_examples << " validation examples.\n"; cout.flush();
 284     mlp->train(&training_set, &validation_set);
 285   }
 286
 287   // Saving the perceptron /////////////////////////////////////////////
 288
 289   if(opt_save_filename[0]) {
 290     if(!mlp) {
 291       cerr << "No perceptron to save.\n";
 292       exit(1);
 293     }
 294
 295     ofstream stream(opt_save_filename);
 296     if(stream.fail()) {
 297       cerr << "Can not write " << opt_save_filename << ".\n";
 298       exit(1);
 299     }
 300
 301     cout << "Saving network " << opt_save_filename << " ... "; cout.flush();
 302     mlp->save(stream);
 303     cout << "done.\n"; cout.flush();
 304   }
 305
 306   // Testing the perceptron ////////////////////////////////////////////
 307
 308   if(nb_test_examples > 0) {
 309     test_set.sample_among_unused_pictures(image_set, nb_test_examples);
 310     cout << "Error rate " << mlp->error(&test_set) << " (" << mlp->classification_error(&test_set)*100 << "%)\n";
 311
 312     // This is to test the analytical gradient
 313     //     scalar_t gradient[mlp->nb_weights()], numerical_gradient[mlp->nb_weights()];
 314     //     mlp->compute_gradient(&test_set, gradient);
 315     //     mlp->compute_numerical_gradient(&test_set, numerical_gradient);
 316     //     for(int i = 0; i < mlp->nb_weights(); i++) cout << "TEST " << gradient[i] << " " << numerical_gradient[i] << "\n";
 317   }
 318
 319   // Flushing the log //////////////////////////////////////////////////
 320
 321   delete[] layer_sizes;
 322 }