3-conv-layer to 2-conv-layer model.

Chunk
1 parent defa5614
Showing 2 changed files with 248 additions and 44 deletions Show diff stats
mmodel/theano/THEANO.py
mmodel/theano/theanoutil.py
@@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase):
     def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
                    learning_rate=0.1, n_epochs=200,
-                   nkerns=[20, 50, 50],
+                   nkerns=[20, 50],
                    batch_size=400):
-        # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
-        #                          batch_size=batch_size)
+        # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
+        # batch_size=batch_size)
-        with gzip.open(dataset, 'rb') as f:
-            train_set, valid_set, test_set = cPickle.load(f)
+        X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
-        train_set_x, train_set_y = shared_dataset(train_set)
-        valid_set_x, valid_set_y = shared_dataset(valid_set)
-        test_set_x, test_set_y = shared_dataset(test_set)
+        train_set_x, train_set_y = shared_dataset((X_train, Y_train))
+        valid_set_x, valid_set_y = train_set_x[:1000], train_set_y[:1000]
+        test_set_x, test_set_y = shared_dataset((X_test, Y_test))
         # compute number of minibatches for training, validation and testing
         n_train_batches = train_set_x.get_value(borrow=True).shape[0]
@@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase):
         n_valid_batches /= batch_size
         n_test_batches /= batch_size
-        print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape
+        print train_set_x.get_value(borrow=True).shape, train_set_y.shape
         rng = np.random.RandomState(12306)
         index = T.lscalar()  # index to a [mini]batch
         # start-snippet-1
-        x = T.matrix('x')   # the data is presented as rasterized images
+        x = T.matrix('x')  # the data is presented as rasterized images
         y = T.ivector('y')  # the labels are presented as 1D vector of
-                            # [int] labels
+        # [int] labels
         ######################
         # BUILD ACTUAL MODEL #
         ######################
         print '... building the model'
-        layer0_input = x.reshape((batch_size, 1, 28, 28))
+        layer0_input = x.reshape((batch_size, 1, 304, 304))
         # Construct the first convolutional pooling layer:
-        # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
-        # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
-        # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
+        # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
+        # maxpooling reduces this further to (297/2, 297/2) = (148, 148)
+        # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148)
         layer0 = ConvPoolLayer(
             rng,
             input=layer0_input,
-            image_shape=(batch_size, 1, 28, 28),
-            filter_shape=(nkerns[0], 1, 5, 5),
+            image_shape=(batch_size, 1, 304, 304),
+            filter_shape=(nkerns[0], 1, 8, 8),
             poolsize=(2, 2)
         )
         # Construct the second convolutional pooling layer
-        # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
-        # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
-        # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
+        # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144)
+        # maxpooling reduces this further to (144/4, 144/4) = (38, 38)
+        # 4D output tensor is thus of shape (batch_size, nkerns[1], 38, 38)
         layer1 = ConvPoolLayer(
             rng,
             input=layer0.output,
-            image_shape=(batch_size, nkerns[0], 12, 12),
+            image_shape=(batch_size, nkerns[0], 148, 148),
             filter_shape=(nkerns[1], nkerns[0], 5, 5),
-            poolsize=(2, 2)
+            poolsize=(4, 4)
         )
         # the HiddenLayer being fully-connected, it operates on 2D matrices of
@@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase):
         layer2 = HiddenLayer(
             rng,
             input=layer2_input,
-            n_in=nkerns[1] * 4 * 4,
+            n_in=nkerns[1] * 38 * 38,
             n_out=500,
             activation=T.tanh
         )
         # classify the values of the fully-connected sigmoidal layer
-        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
+        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2)
         # the cost we minimize during training is the NLL of the model
         cost = layer3.negative_log_likelihood(y)
@@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase):
             (param_i, param_i - learning_rate * grad_i)
             for param_i, grad_i in zip(params, grads)
         ]
-
+        """
+        Total Parameters:
+        >>> 20 * 64 + 1000 * 25 + 50 * 38 * 38 * 500 + 500 * 2
+        36127280
+        """
         train_model = theano.function(
             [index],
             cost,
@@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase):
         # early-stopping parameters
         patience = 10000  # look as this many examples regardless
         patience_increase = 2  # wait this much longer when a new best is
-                               # found
+        # found
         improvement_threshold = 0.995  # a relative improvement of this much is
-                                       # considered significant
+        # considered significant
         validation_frequency = min(n_train_batches, patience / 2)
-                                      # go through this many
-                                      # minibatche before checking the network
-                                      # on the validation set; in this case we
-                                      # check every epoch
+        # go through this many
+        # minibatche before checking the network
+        # on the validation set; in this case we
+        # check every epoch
         best_validation_loss = np.inf
         best_iter = 0
@@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase):
                 iter = (epoch - 1) * n_train_batches + minibatch_index
-                if iter % 100 == 0:
-                    print 'training @ iter = ', iter
+                # if iter % 100 == 0:
+                print 'training @ iter = ', iter
                 cost_ij = train_model(minibatch_index)
                 if (iter + 1) % validation_frequency == 0:
@@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase):
                     if this_validation_loss < best_validation_loss:
                         #improve patience if loss improvement is good enough
-                        if this_validation_loss < best_validation_loss *  \
-                           improvement_threshold:
+                        if this_validation_loss < best_validation_loss * \
+                                improvement_threshold:
                             patience = max(patience, iter * patience_increase)
                         # save best validation score and iteration number
@@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase):
                               ' ran for %.2fm' % ((end_time - start_time) / 60.))
-
-
-
-
-
-
-
-
     def train(self, X, Y):
         if self.toolset == 'cnn':
             return self._train_cnn(X, Y)
@@ -11,6 +11,7 @@ import theano.tensor as T
 from theano.tensor.signal import downsample
 from theano.tensor.nnet import conv
+import gzip
 import cPickle
@@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True):
     # lets ous get around this issue
     return shared_x, T.cast(shared_y, 'int32')
-def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
+def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
                learning_rate=0.1, n_epochs=200,
                nkerns=[20, 50, 50],
                batch_size=400):
@@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il
+def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'),
+               learning_rate=0.1, n_epochs=200,
+               nkerns=[20, 50],
+               batch_size=500):
+
+    # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
+    #                          batch_size=batch_size)
+
+    with gzip.open(dataset, 'rb') as f:
+        train_set, valid_set, test_set = cPickle.load(f)
+
+    train_set_x, train_set_y = shared_dataset(train_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    test_set_x, test_set_y = shared_dataset(test_set)
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
+    n_train_batches /= batch_size
+    n_valid_batches /= batch_size
+    n_test_batches /= batch_size
+
+    print train_set_x.get_value(borrow=True).shape, train_set_y.shape
+
+    rng = np.random.RandomState(12306)
+    index = T.lscalar()  # index to a [mini]batch
+    # start-snippet-1
+    x = T.matrix('x')   # the data is presented as rasterized images
+    y = T.ivector('y')  # the labels are presented as 1D vector of
+                        # [int] labels
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+    layer0_input = x.reshape((batch_size, 1, 28, 28))
+
+    # Construct the first convolutional pooling layer:
+    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
+    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
+    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
+    layer0 = ConvPoolLayer(
+        rng,
+        input=layer0_input,
+        image_shape=(batch_size, 1, 28, 28),
+        filter_shape=(nkerns[0], 1, 5, 5),
+        poolsize=(2, 2)
+    )
+
+    # Construct the second convolutional pooling layer
+    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
+    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
+    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
+    layer1 = ConvPoolLayer(
+        rng,
+        input=layer0.output,
+        image_shape=(batch_size, nkerns[0], 12, 12),
+        filter_shape=(nkerns[1], nkerns[0], 5, 5),
+        poolsize=(2, 2)
+    )
+
+    # the HiddenLayer being fully-connected, it operates on 2D matrices of
+    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
+    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
+    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
+    layer2_input = layer1.output.flatten(2)
+
+    # construct a fully-connected sigmoidal layer
+    layer2 = HiddenLayer(
+        rng,
+        input=layer2_input,
+        n_in=nkerns[1] * 4 * 4,
+        n_out=500,
+        activation=T.tanh
+    )
+
+    # classify the values of the fully-connected sigmoidal layer
+    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
+
+    # the cost we minimize during training is the NLL of the model
+    cost = layer3.negative_log_likelihood(y)
+
+    # create a function to compute the mistakes that are made by the model
+    test_model = theano.function(
+        [index],
+        layer3.errors(y),
+        givens={
+            x: test_set_x[index * batch_size: (index + 1) * batch_size],
+            y: test_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    validate_model = theano.function(
+        [index],
+        layer3.errors(y),
+        givens={
+            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
+            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    # create a list of all model parameters to be fit by gradient descent
+    params = layer3.params + layer2.params + layer1.params + layer0.params
+
+    # create a list of gradients for all model parameters
+    grads = T.grad(cost, params)
+
+    # train_model is a function that updates the model parameters by
+    # SGD Since this model has many parameters, it would be tedious to
+    # manually create an update rule for each model parameter. We thus
+    # create the updates list by automatically looping over all
+    # (params[i], grads[i]) pairs.
+    updates = [
+        (param_i, param_i - learning_rate * grad_i)
+        for param_i, grad_i in zip(params, grads)
+    ]
+
+    train_model = theano.function(
+        [index],
+        cost,
+        updates=updates,
+        givens={
+            x: train_set_x[index * batch_size: (index + 1) * batch_size],
+            y: train_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+    # end-snippet-1
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training'
+    # early-stopping parameters
+    patience = 10000  # look as this many examples regardless
+    patience_increase = 2  # wait this much longer when a new best is
+                           # found
+    improvement_threshold = 0.995  # a relative improvement of this much is
+                                   # considered significant
+    validation_frequency = min(n_train_batches, patience / 2)
+                                  # go through this many
+                                  # minibatche before checking the network
+                                  # on the validation set; in this case we
+                                  # check every epoch
+
+    best_validation_loss = np.inf
+    best_iter = 0
+    test_score = 0.
+    start_time = time.clock()
+
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            iter = (epoch - 1) * n_train_batches + minibatch_index
+
+            if iter % 100 == 0:
+                print 'training @ iter = ', iter
+            cost_ij = train_model(minibatch_index)
+            if (iter + 1) % validation_frequency == 0:
+
+                # compute zero-one loss on validation set
+                validation_losses = [validate_model(i) for i
+                                     in xrange(n_valid_batches)]
+                this_validation_loss = np.mean(validation_losses)
+                print('epoch %i, minibatch %i/%i, validation error %f %%' %
+                      (epoch, minibatch_index + 1, n_train_batches,
+                       this_validation_loss * 100.))
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+
+                    #improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold:
+                        patience = max(patience, iter * patience_increase)
+
+                    # save best validation score and iteration number
+                    best_validation_loss = this_validation_loss
+                    best_iter = iter
+
+                    # test it on the test set
+                    test_losses = [
+                        test_model(i)
+                        for i in xrange(n_test_batches)
+                    ]
+                    test_score = np.mean(test_losses)
+                    print(('     epoch %i, minibatch %i/%i, test error of '
+                           'best model %f %%') %
+                          (epoch, minibatch_index + 1, n_train_batches,
+                           test_score * 100.))
+
+            if patience <= iter:
+                done_looping = True
+                break
+
+    end_time = time.clock()
+    print('Optimization complete.')
+    print('Best validation score of %f %% obtained at iteration %i, '
+          'with test performance %f %%' %
+          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print >> sys.stderr, ('The code for file ' +
+                          os.path.split(__file__)[1] +
+                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
	@@ -11,6 +11,7 @@ import theano.tensor as T		@@ -11,6 +11,7 @@ import theano.tensor as T
11	from theano.tensor.signal import downsample	11	from theano.tensor.signal import downsample
12	from theano.tensor.nnet import conv	12	from theano.tensor.nnet import conv
13		13
		14	+import gzip
14	import cPickle	15	import cPickle
15		16
16		17
	@@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True):		@@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True):
193	# lets ous get around this issue	194	# lets ous get around this issue
194	return shared_x, T.cast(shared_y, 'int32')	195	return shared_x, T.cast(shared_y, 'int32')
195		196
196	-def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),	197	+def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
197	learning_rate=0.1, n_epochs=200,	198	learning_rate=0.1, n_epochs=200,
198	nkerns=[20, 50, 50],	199	nkerns=[20, 50, 50],
199	batch_size=400):	200	batch_size=400):
	@@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'il		@@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'il
388		389
389		390
390		391
		392	+def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'),
		393	+ learning_rate=0.1, n_epochs=200,
		394	+ nkerns=[20, 50],
		395	+ batch_size=500):
		396	+
		397	+ # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
		398	+ # batch_size=batch_size)
		399	+
		400	+ with gzip.open(dataset, 'rb') as f:
		401	+ train_set, valid_set, test_set = cPickle.load(f)
		402	+
		403	+ train_set_x, train_set_y = shared_dataset(train_set)
		404	+ valid_set_x, valid_set_y = shared_dataset(valid_set)
		405	+ test_set_x, test_set_y = shared_dataset(test_set)
		406	+
		407	+ # compute number of minibatches for training, validation and testing
		408	+ n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		409	+ n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
		410	+ n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		411	+ n_train_batches /= batch_size
		412	+ n_valid_batches /= batch_size
		413	+ n_test_batches /= batch_size
		414	+
		415	+ print train_set_x.get_value(borrow=True).shape, train_set_y.shape
		416	+
		417	+ rng = np.random.RandomState(12306)
		418	+ index = T.lscalar() # index to a [mini]batch
		419	+ # start-snippet-1
		420	+ x = T.matrix('x') # the data is presented as rasterized images
		421	+ y = T.ivector('y') # the labels are presented as 1D vector of
		422	+ # [int] labels
		423	+
		424	+ ######################
		425	+ # BUILD ACTUAL MODEL #
		426	+ ######################
		427	+ print '... building the model'
		428	+
		429	+ layer0_input = x.reshape((batch_size, 1, 28, 28))
		430	+
		431	+ # Construct the first convolutional pooling layer:
		432	+ # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
		433	+ # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
		434	+ # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
		435	+ layer0 = ConvPoolLayer(
		436	+ rng,
		437	+ input=layer0_input,
		438	+ image_shape=(batch_size, 1, 28, 28),
		439	+ filter_shape=(nkerns[0], 1, 5, 5),
		440	+ poolsize=(2, 2)
		441	+ )
		442	+
		443	+ # Construct the second convolutional pooling layer
		444	+ # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
		445	+ # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
		446	+ # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
		447	+ layer1 = ConvPoolLayer(
		448	+ rng,
		449	+ input=layer0.output,
		450	+ image_shape=(batch_size, nkerns[0], 12, 12),
		451	+ filter_shape=(nkerns[1], nkerns[0], 5, 5),
		452	+ poolsize=(2, 2)
		453	+ )
		454	+
		455	+ # the HiddenLayer being fully-connected, it operates on 2D matrices of
		456	+ # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
		457	+ # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
		458	+ # or (500, 50 * 4 * 4) = (500, 800) with the default values.
		459	+ layer2_input = layer1.output.flatten(2)
		460	+
		461	+ # construct a fully-connected sigmoidal layer
		462	+ layer2 = HiddenLayer(
		463	+ rng,
		464	+ input=layer2_input,
		465	+ n_in=nkerns[1] * 4 * 4,
		466	+ n_out=500,
		467	+ activation=T.tanh
		468	+ )
		469	+
		470	+ # classify the values of the fully-connected sigmoidal layer
		471	+ layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
		472	+
		473	+ # the cost we minimize during training is the NLL of the model
		474	+ cost = layer3.negative_log_likelihood(y)
		475	+
		476	+ # create a function to compute the mistakes that are made by the model
		477	+ test_model = theano.function(
		478	+ [index],
		479	+ layer3.errors(y),
		480	+ givens={
		481	+ x: test_set_x[index * batch_size: (index + 1) * batch_size],
		482	+ y: test_set_y[index * batch_size: (index + 1) * batch_size]
		483	+ }
		484	+ )
		485	+
		486	+ validate_model = theano.function(
		487	+ [index],
		488	+ layer3.errors(y),
		489	+ givens={
		490	+ x: valid_set_x[index * batch_size: (index + 1) * batch_size],
		491	+ y: valid_set_y[index * batch_size: (index + 1) * batch_size]
		492	+ }
		493	+ )
		494	+
		495	+ # create a list of all model parameters to be fit by gradient descent
		496	+ params = layer3.params + layer2.params + layer1.params + layer0.params
		497	+
		498	+ # create a list of gradients for all model parameters
		499	+ grads = T.grad(cost, params)
		500	+
		501	+ # train_model is a function that updates the model parameters by
		502	+ # SGD Since this model has many parameters, it would be tedious to
		503	+ # manually create an update rule for each model parameter. We thus
		504	+ # create the updates list by automatically looping over all
		505	+ # (params[i], grads[i]) pairs.
		506	+ updates = [
		507	+ (param_i, param_i - learning_rate * grad_i)
		508	+ for param_i, grad_i in zip(params, grads)
		509	+ ]
		510	+
		511	+ train_model = theano.function(
		512	+ [index],
		513	+ cost,
		514	+ updates=updates,
		515	+ givens={
		516	+ x: train_set_x[index * batch_size: (index + 1) * batch_size],
		517	+ y: train_set_y[index * batch_size: (index + 1) * batch_size]
		518	+ }
		519	+ )
		520	+ # end-snippet-1
		521	+
		522	+ ###############
		523	+ # TRAIN MODEL #
		524	+ ###############
		525	+ print '... training'
		526	+ # early-stopping parameters
		527	+ patience = 10000 # look as this many examples regardless
		528	+ patience_increase = 2 # wait this much longer when a new best is
		529	+ # found
		530	+ improvement_threshold = 0.995 # a relative improvement of this much is
		531	+ # considered significant
		532	+ validation_frequency = min(n_train_batches, patience / 2)
		533	+ # go through this many
		534	+ # minibatche before checking the network
		535	+ # on the validation set; in this case we
		536	+ # check every epoch
		537	+
		538	+ best_validation_loss = np.inf
		539	+ best_iter = 0
		540	+ test_score = 0.
		541	+ start_time = time.clock()
		542	+
		543	+ epoch = 0
		544	+ done_looping = False
		545	+
		546	+ while (epoch < n_epochs) and (not done_looping):
		547	+ epoch = epoch + 1
		548	+ for minibatch_index in xrange(n_train_batches):
		549	+
		550	+ iter = (epoch - 1) * n_train_batches + minibatch_index
		551	+
		552	+ if iter % 100 == 0:
		553	+ print 'training @ iter = ', iter
		554	+ cost_ij = train_model(minibatch_index)
391		555
		556	+ if (iter + 1) % validation_frequency == 0:
		557	+
		558	+ # compute zero-one loss on validation set
		559	+ validation_losses = [validate_model(i) for i
		560	+ in xrange(n_valid_batches)]
		561	+ this_validation_loss = np.mean(validation_losses)
		562	+ print('epoch %i, minibatch %i/%i, validation error %f %%' %
		563	+ (epoch, minibatch_index + 1, n_train_batches,
		564	+ this_validation_loss * 100.))
		565	+
		566	+ # if we got the best validation score until now
		567	+ if this_validation_loss < best_validation_loss:
		568	+
		569	+ #improve patience if loss improvement is good enough
		570	+ if this_validation_loss < best_validation_loss * \
		571	+ improvement_threshold:
		572	+ patience = max(patience, iter * patience_increase)
		573	+
		574	+ # save best validation score and iteration number
		575	+ best_validation_loss = this_validation_loss
		576	+ best_iter = iter
		577	+
		578	+ # test it on the test set
		579	+ test_losses = [
		580	+ test_model(i)
		581	+ for i in xrange(n_test_batches)
		582	+ ]
		583	+ test_score = np.mean(test_losses)
		584	+ print((' epoch %i, minibatch %i/%i, test error of '
		585	+ 'best model %f %%') %
		586	+ (epoch, minibatch_index + 1, n_train_batches,
		587	+ test_score * 100.))
		588	+
		589	+ if patience <= iter:
		590	+ done_looping = True
		591	+ break
		592	+
		593	+ end_time = time.clock()
		594	+ print('Optimization complete.')
		595	+ print('Best validation score of %f %% obtained at iteration %i, '
		596	+ 'with test performance %f %%' %
		597	+ (best_validation_loss * 100., best_iter + 1, test_score * 100.))
		598	+ print >> sys.stderr, ('The code for file ' +
		599	+ os.path.split(__file__)[1] +
		600	+ ' ran for %.2fm' % ((end_time - start_time) / 60.))
392		601
393		602
394		603