Commit defa5614493b6f2be2564623fc28f707d46ee1e8

Authored by Chunk
1 parent 3ef6ddf1
Exists in master and in 1 other branch refactor

mnist re-testing...

mmodel/theano/THEANO.py
@@ -37,38 +37,223 @@ class ModelTHEANO(ModelBase): @@ -37,38 +37,223 @@ class ModelTHEANO(ModelBase):
37 self.sparker = sc 37 self.sparker = sc
38 self.model = None 38 self.model = None
39 39
40 - def _shared_dataset(self, data_xy, borrow=True):  
41 - """ Function that loads the dataset into shared variables  
42 -  
43 - The reason we store our dataset in shared variables is to allow  
44 - Theano to copy it into the GPU memory (when code is run on GPU).  
45 - Since copying data into the GPU is slow, copying a minibatch everytime  
46 - is needed (the default behaviour if the data is not in a shared  
47 - variable) would lead to a large decrease in performance.  
48 - """  
49 - data_x, data_y = data_xy  
50 - shared_x = theano.shared(np.asarray(data_x,  
51 - dtype=theano.config.floatX),  
52 - borrow=borrow)  
53 - shared_y = theano.shared(np.asarray(data_y,  
54 - dtype=theano.config.floatX),  
55 - borrow=borrow)  
56 - # When storing data on the GPU it has to be stored as floats  
57 - # therefore we will store the labels as ``floatX`` as well  
58 - # (``shared_y`` does exactly that). But during our computations  
59 - # we need them as ints (we use labels as index, and if they are  
60 - # floats it doesn't make sense) therefore instead of returning  
61 - # ``shared_y`` we will have to cast it to int. This little hack  
62 - # lets ous get around this issue  
63 - return shared_x, T.cast(shared_y, 'int32')  
64 -  
65 - def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'ils_crop.pkl'), 40 + def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
66 learning_rate=0.1, n_epochs=200, 41 learning_rate=0.1, n_epochs=200,
67 nkerns=[20, 50, 50], 42 nkerns=[20, 50, 50],
68 batch_size=400): 43 batch_size=400):
69 44
70 - return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,  
71 - batch_size=batch_size) 45 + # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
  46 + # batch_size=batch_size)
  47 +
  48 + with gzip.open(dataset, 'rb') as f:
  49 + train_set, valid_set, test_set = cPickle.load(f)
  50 +
  51 + train_set_x, train_set_y = shared_dataset(train_set)
  52 + valid_set_x, valid_set_y = shared_dataset(valid_set)
  53 + test_set_x, test_set_y = shared_dataset(test_set)
  54 +
  55 + # compute number of minibatches for training, validation and testing
  56 + n_train_batches = train_set_x.get_value(borrow=True).shape[0]
  57 + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
  58 + n_test_batches = test_set_x.get_value(borrow=True).shape[0]
  59 + n_train_batches /= batch_size
  60 + n_valid_batches /= batch_size
  61 + n_test_batches /= batch_size
  62 +
  63 + print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape
  64 +
  65 + rng = np.random.RandomState(12306)
  66 + index = T.lscalar() # index to a [mini]batch
  67 + # start-snippet-1
  68 + x = T.matrix('x') # the data is presented as rasterized images
  69 + y = T.ivector('y') # the labels are presented as 1D vector of
  70 + # [int] labels
  71 +
  72 + ######################
  73 + # BUILD ACTUAL MODEL #
  74 + ######################
  75 + print '... building the model'
  76 +
  77 + layer0_input = x.reshape((batch_size, 1, 28, 28))
  78 +
  79 + # Construct the first convolutional pooling layer:
  80 + # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
  81 + # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
  82 + # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
  83 + layer0 = ConvPoolLayer(
  84 + rng,
  85 + input=layer0_input,
  86 + image_shape=(batch_size, 1, 28, 28),
  87 + filter_shape=(nkerns[0], 1, 5, 5),
  88 + poolsize=(2, 2)
  89 + )
  90 +
  91 + # Construct the second convolutional pooling layer
  92 + # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
  93 + # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
  94 + # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
  95 + layer1 = ConvPoolLayer(
  96 + rng,
  97 + input=layer0.output,
  98 + image_shape=(batch_size, nkerns[0], 12, 12),
  99 + filter_shape=(nkerns[1], nkerns[0], 5, 5),
  100 + poolsize=(2, 2)
  101 + )
  102 +
  103 + # the HiddenLayer being fully-connected, it operates on 2D matrices of
  104 + # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
  105 + # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
  106 + # or (500, 50 * 4 * 4) = (500, 800) with the default values.
  107 + layer2_input = layer1.output.flatten(2)
  108 +
  109 + # construct a fully-connected sigmoidal layer
  110 + layer2 = HiddenLayer(
  111 + rng,
  112 + input=layer2_input,
  113 + n_in=nkerns[1] * 4 * 4,
  114 + n_out=500,
  115 + activation=T.tanh
  116 + )
  117 +
  118 + # classify the values of the fully-connected sigmoidal layer
  119 + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
  120 +
  121 + # the cost we minimize during training is the NLL of the model
  122 + cost = layer3.negative_log_likelihood(y)
  123 +
  124 + # create a function to compute the mistakes that are made by the model
  125 + test_model = theano.function(
  126 + [index],
  127 + layer3.errors(y),
  128 + givens={
  129 + x: test_set_x[index * batch_size: (index + 1) * batch_size],
  130 + y: test_set_y[index * batch_size: (index + 1) * batch_size]
  131 + }
  132 + )
  133 +
  134 + validate_model = theano.function(
  135 + [index],
  136 + layer3.errors(y),
  137 + givens={
  138 + x: valid_set_x[index * batch_size: (index + 1) * batch_size],
  139 + y: valid_set_y[index * batch_size: (index + 1) * batch_size]
  140 + }
  141 + )
  142 +
  143 + # create a list of all model parameters to be fit by gradient descent
  144 + params = layer3.params + layer2.params + layer1.params + layer0.params
  145 +
  146 + # create a list of gradients for all model parameters
  147 + grads = T.grad(cost, params)
  148 +
  149 + # train_model is a function that updates the model parameters by
  150 + # SGD Since this model has many parameters, it would be tedious to
  151 + # manually create an update rule for each model parameter. We thus
  152 + # create the updates list by automatically looping over all
  153 + # (params[i], grads[i]) pairs.
  154 + updates = [
  155 + (param_i, param_i - learning_rate * grad_i)
  156 + for param_i, grad_i in zip(params, grads)
  157 + ]
  158 +
  159 + train_model = theano.function(
  160 + [index],
  161 + cost,
  162 + updates=updates,
  163 + givens={
  164 + x: train_set_x[index * batch_size: (index + 1) * batch_size],
  165 + y: train_set_y[index * batch_size: (index + 1) * batch_size]
  166 + }
  167 + )
  168 + # end-snippet-1
  169 +
  170 + ###############
  171 + # TRAIN MODEL #
  172 + ###############
  173 + print '... training'
  174 + # early-stopping parameters
  175 + patience = 10000 # look as this many examples regardless
  176 + patience_increase = 2 # wait this much longer when a new best is
  177 + # found
  178 + improvement_threshold = 0.995 # a relative improvement of this much is
  179 + # considered significant
  180 + validation_frequency = min(n_train_batches, patience / 2)
  181 + # go through this many
  182 + # minibatche before checking the network
  183 + # on the validation set; in this case we
  184 + # check every epoch
  185 +
  186 + best_validation_loss = np.inf
  187 + best_iter = 0
  188 + test_score = 0.
  189 + start_time = time.clock()
  190 +
  191 + epoch = 0
  192 + done_looping = False
  193 +
  194 + while (epoch < n_epochs) and (not done_looping):
  195 + epoch = epoch + 1
  196 + for minibatch_index in xrange(n_train_batches):
  197 +
  198 + iter = (epoch - 1) * n_train_batches + minibatch_index
  199 +
  200 + if iter % 100 == 0:
  201 + print 'training @ iter = ', iter
  202 + cost_ij = train_model(minibatch_index)
  203 +
  204 + if (iter + 1) % validation_frequency == 0:
  205 +
  206 + # compute zero-one loss on validation set
  207 + validation_losses = [validate_model(i) for i
  208 + in xrange(n_valid_batches)]
  209 + this_validation_loss = np.mean(validation_losses)
  210 + print('epoch %i, minibatch %i/%i, validation error %f %%' %
  211 + (epoch, minibatch_index + 1, n_train_batches,
  212 + this_validation_loss * 100.))
  213 +
  214 + # if we got the best validation score until now
  215 + if this_validation_loss < best_validation_loss:
  216 +
  217 + #improve patience if loss improvement is good enough
  218 + if this_validation_loss < best_validation_loss * \
  219 + improvement_threshold:
  220 + patience = max(patience, iter * patience_increase)
  221 +
  222 + # save best validation score and iteration number
  223 + best_validation_loss = this_validation_loss
  224 + best_iter = iter
  225 +
  226 + # test it on the test set
  227 + test_losses = [
  228 + test_model(i)
  229 + for i in xrange(n_test_batches)
  230 + ]
  231 + test_score = np.mean(test_losses)
  232 + print((' epoch %i, minibatch %i/%i, test error of '
  233 + 'best model %f %%') %
  234 + (epoch, minibatch_index + 1, n_train_batches,
  235 + test_score * 100.))
  236 +
  237 + if patience <= iter:
  238 + done_looping = True
  239 + break
  240 +
  241 + end_time = time.clock()
  242 + print('Optimization complete.')
  243 + print('Best validation score of %f %% obtained at iteration %i, '
  244 + 'with test performance %f %%' %
  245 + (best_validation_loss * 100., best_iter + 1, test_score * 100.))
  246 + print >> sys.stderr, ('The code for file ' +
  247 + os.path.split(__file__)[1] +
  248 + ' ran for %.2fm' % ((end_time - start_time) / 60.))
  249 +
  250 +
  251 +
  252 +
  253 +
  254 +
  255 +
  256 +
72 257
73 258
74 def train(self, X, Y): 259 def train(self, X, Y):
mmodel/theano/theanoutil.py
@@ -168,7 +168,7 @@ class ConvPoolLayer(object): @@ -168,7 +168,7 @@ class ConvPoolLayer(object):
168 self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 168 self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
169 self.params = [self.W, self.b] 169 self.params = [self.W, self.b]
170 170
171 -def _shared_dataset(data_xy, borrow=True): 171 +def shared_dataset(data_xy, borrow=True):
172 """ Function that loads the dataset into shared variables 172 """ Function that loads the dataset into shared variables
173 173
174 The reason we store our dataset in shared variables is to allow 174 The reason we store our dataset in shared variables is to allow
@@ -208,8 +208,8 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il @@ -208,8 +208,8 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il
208 else: 208 else:
209 X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) 209 X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
210 210
211 - X_train, Y_train = _shared_dataset((X_train, Y_train))  
212 - X_test, Y_test = _shared_dataset((X_test, Y_test)) 211 + X_train, Y_train = shared_dataset((X_train, Y_train))
  212 + X_test, Y_test = shared_dataset((X_test, Y_test))
213 213
214 # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True) 214 # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
215 # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True) 215 # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
test/test_model.py
@@ -149,6 +149,11 @@ def test_SVM_ILSVRC_S(): @@ -149,6 +149,11 @@ def test_SVM_ILSVRC_S():
149 # test_SVM_ILSVRC_SPARK() 149 # test_SVM_ILSVRC_SPARK()
150 150
151 151
  152 +def test_THEANO_mnist():
  153 + mtheano = THEANO.ModelTHEANO(toolset='cnn')
  154 + mtheano._train_cnn(learning_rate=0.1, n_epochs=200, dataset=os.path.join(package_dir, '../res/', 'mnist.pkl.gz'), nkerns=[20, 50], batch_size=500)
  155 +
  156 +
152 def test_THEANO_crop(): 157 def test_THEANO_crop():
153 timer.mark() 158 timer.mark()
154 dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil') 159 dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil')