3-conv-layer to 2-conv-layer model.

Chunk
1 parent defa5614
Showing 2 changed files with 248 additions and 44 deletions Show diff stats
mmodel/theano/THEANO.py
mmodel/theano/theanoutil.py
@@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase):
  
     def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
                    learning_rate=0.1, n_epochs=200,
-                   nkerns=[20, 50, 50],
+                   nkerns=[20, 50],
                    batch_size=400):
  
-        # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
-        #                          batch_size=batch_size)
+        # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
+        # batch_size=batch_size)
  
-        with gzip.open(dataset, 'rb') as f:
-            train_set, valid_set, test_set = cPickle.load(f)
+        X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
  
-        train_set_x, train_set_y = shared_dataset(train_set)
-        valid_set_x, valid_set_y = shared_dataset(valid_set)
-        test_set_x, test_set_y = shared_dataset(test_set)
+        train_set_x, train_set_y = shared_dataset((X_train, Y_train))
+        valid_set_x, valid_set_y = train_set_x[:1000], train_set_y[:1000]
+        test_set_x, test_set_y = shared_dataset((X_test, Y_test))
  
         # compute number of minibatches for training, validation and testing
         n_train_batches = train_set_x.get_value(borrow=True).shape[0]
@@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase):
         n_valid_batches /= batch_size
         n_test_batches /= batch_size
  
-        print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape
+        print train_set_x.get_value(borrow=True).shape, train_set_y.shape
  
         rng = np.random.RandomState(12306)
         index = T.lscalar()  # index to a [mini]batch
         # start-snippet-1
-        x = T.matrix('x')   # the data is presented as rasterized images
+        x = T.matrix('x')  # the data is presented as rasterized images
         y = T.ivector('y')  # the labels are presented as 1D vector of
-                            # [int] labels
+        # [int] labels
  
         ######################
         # BUILD ACTUAL MODEL #
         ######################
         print '... building the model'
  
-        layer0_input = x.reshape((batch_size, 1, 28, 28))
+        layer0_input = x.reshape((batch_size, 1, 304, 304))
  
         # Construct the first convolutional pooling layer:
-        # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
-        # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
-        # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
+        # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
+        # maxpooling reduces this further to (297/2, 297/2) = (148, 148)
+        # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148)
         layer0 = ConvPoolLayer(
             rng,
             input=layer0_input,
-            image_shape=(batch_size, 1, 28, 28),
-            filter_shape=(nkerns[0], 1, 5, 5),
+            image_shape=(batch_size, 1, 304, 304),
+            filter_shape=(nkerns[0], 1, 8, 8),
             poolsize=(2, 2)
         )
  
         # Construct the second convolutional pooling layer
-        # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
-        # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
-        # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
+        # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144)
+        # maxpooling reduces this further to (144/4, 144/4) = (38, 38)
+        # 4D output tensor is thus of shape (batch_size, nkerns[1], 38, 38)
         layer1 = ConvPoolLayer(
             rng,
             input=layer0.output,
-            image_shape=(batch_size, nkerns[0], 12, 12),
+            image_shape=(batch_size, nkerns[0], 148, 148),
             filter_shape=(nkerns[1], nkerns[0], 5, 5),
-            poolsize=(2, 2)
+            poolsize=(4, 4)
         )
  
         # the HiddenLayer being fully-connected, it operates on 2D matrices of
@@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase):
         layer2 = HiddenLayer(
             rng,
             input=layer2_input,
-            n_in=nkerns[1] * 4 * 4,
+            n_in=nkerns[1] * 38 * 38,
             n_out=500,
             activation=T.tanh
         )
  
         # classify the values of the fully-connected sigmoidal layer
-        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
+        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2)
  
         # the cost we minimize during training is the NLL of the model
         cost = layer3.negative_log_likelihood(y)
@@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase):
             (param_i, param_i - learning_rate * grad_i)
             for param_i, grad_i in zip(params, grads)
         ]
-
+        """
+        Total Parameters:
+        >>> 20 * 64 + 1000 * 25 + 50 * 38 * 38 * 500 + 500 * 2
+        36127280
+        """
         train_model = theano.function(
             [index],
             cost,
@@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase):
         # early-stopping parameters
         patience = 10000  # look as this many examples regardless
         patience_increase = 2  # wait this much longer when a new best is
-                               # found
+        # found
         improvement_threshold = 0.995  # a relative improvement of this much is
-                                       # considered significant
+        # considered significant
         validation_frequency = min(n_train_batches, patience / 2)
-                                      # go through this many
-                                      # minibatche before checking the network
-                                      # on the validation set; in this case we
-                                      # check every epoch
+        # go through this many
+        # minibatche before checking the network
+        # on the validation set; in this case we
+        # check every epoch
  
         best_validation_loss = np.inf
         best_iter = 0
@@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase):
  
                 iter = (epoch - 1) * n_train_batches + minibatch_index
  
-                if iter % 100 == 0:
-                    print 'training @ iter = ', iter
+                # if iter % 100 == 0:
+                print 'training @ iter = ', iter
                 cost_ij = train_model(minibatch_index)
  
                 if (iter + 1) % validation_frequency == 0:
@@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase):
                     if this_validation_loss < best_validation_loss:
  
                         #improve patience if loss improvement is good enough
-                        if this_validation_loss < best_validation_loss *  \
-                           improvement_threshold:
+                        if this_validation_loss < best_validation_loss * \
+                                improvement_threshold:
                             patience = max(patience, iter * patience_increase)
  
                         # save best validation score and iteration number
@@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase):
                               ' ran for %.2fm' % ((end_time - start_time) / 60.))
  
  
-
-
-
-
-
-
-
-
     def train(self, X, Y):
         if self.toolset == 'cnn':
             return self._train_cnn(X, Y)
@@ -11,6 +11,7 @@ import theano.tensor as T
 from theano.tensor.signal import downsample
 from theano.tensor.nnet import conv
  
+import gzip
 import cPickle
  
  
@@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True):
     # lets ous get around this issue
     return shared_x, T.cast(shared_y, 'int32')
  
-def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
+def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
                learning_rate=0.1, n_epochs=200,
                nkerns=[20, 50, 50],
                batch_size=400):
@@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il
  
  
  
+def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'),
+               learning_rate=0.1, n_epochs=200,
+               nkerns=[20, 50],
+               batch_size=500):
+
+    # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
+    #                          batch_size=batch_size)
+
+    with gzip.open(dataset, 'rb') as f:
+        train_set, valid_set, test_set = cPickle.load(f)
+
+    train_set_x, train_set_y = shared_dataset(train_set)
+    valid_set_x, valid_set_y = shared_dataset(valid_set)
+    test_set_x, test_set_y = shared_dataset(test_set)
+
+    # compute number of minibatches for training, validation and testing
+    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
+    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
+    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
+    n_train_batches /= batch_size
+    n_valid_batches /= batch_size
+    n_test_batches /= batch_size
+
+    print train_set_x.get_value(borrow=True).shape, train_set_y.shape
+
+    rng = np.random.RandomState(12306)
+    index = T.lscalar()  # index to a [mini]batch
+    # start-snippet-1
+    x = T.matrix('x')   # the data is presented as rasterized images
+    y = T.ivector('y')  # the labels are presented as 1D vector of
+                        # [int] labels
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+    layer0_input = x.reshape((batch_size, 1, 28, 28))
+
+    # Construct the first convolutional pooling layer:
+    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
+    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
+    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
+    layer0 = ConvPoolLayer(
+        rng,
+        input=layer0_input,
+        image_shape=(batch_size, 1, 28, 28),
+        filter_shape=(nkerns[0], 1, 5, 5),
+        poolsize=(2, 2)
+    )
+
+    # Construct the second convolutional pooling layer
+    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
+    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
+    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
+    layer1 = ConvPoolLayer(
+        rng,
+        input=layer0.output,
+        image_shape=(batch_size, nkerns[0], 12, 12),
+        filter_shape=(nkerns[1], nkerns[0], 5, 5),
+        poolsize=(2, 2)
+    )
+
+    # the HiddenLayer being fully-connected, it operates on 2D matrices of
+    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
+    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
+    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
+    layer2_input = layer1.output.flatten(2)
+
+    # construct a fully-connected sigmoidal layer
+    layer2 = HiddenLayer(
+        rng,
+        input=layer2_input,
+        n_in=nkerns[1] * 4 * 4,
+        n_out=500,
+        activation=T.tanh
+    )
+
+    # classify the values of the fully-connected sigmoidal layer
+    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
+
+    # the cost we minimize during training is the NLL of the model
+    cost = layer3.negative_log_likelihood(y)
+
+    # create a function to compute the mistakes that are made by the model
+    test_model = theano.function(
+        [index],
+        layer3.errors(y),
+        givens={
+            x: test_set_x[index * batch_size: (index + 1) * batch_size],
+            y: test_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    validate_model = theano.function(
+        [index],
+        layer3.errors(y),
+        givens={
+            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
+            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    # create a list of all model parameters to be fit by gradient descent
+    params = layer3.params + layer2.params + layer1.params + layer0.params
+
+    # create a list of gradients for all model parameters
+    grads = T.grad(cost, params)
+
+    # train_model is a function that updates the model parameters by
+    # SGD Since this model has many parameters, it would be tedious to
+    # manually create an update rule for each model parameter. We thus
+    # create the updates list by automatically looping over all
+    # (params[i], grads[i]) pairs.
+    updates = [
+        (param_i, param_i - learning_rate * grad_i)
+        for param_i, grad_i in zip(params, grads)
+    ]
+
+    train_model = theano.function(
+        [index],
+        cost,
+        updates=updates,
+        givens={
+            x: train_set_x[index * batch_size: (index + 1) * batch_size],
+            y: train_set_y[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+    # end-snippet-1
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training'
+    # early-stopping parameters
+    patience = 10000  # look as this many examples regardless
+    patience_increase = 2  # wait this much longer when a new best is
+                           # found
+    improvement_threshold = 0.995  # a relative improvement of this much is
+                                   # considered significant
+    validation_frequency = min(n_train_batches, patience / 2)
+                                  # go through this many
+                                  # minibatche before checking the network
+                                  # on the validation set; in this case we
+                                  # check every epoch
+
+    best_validation_loss = np.inf
+    best_iter = 0
+    test_score = 0.
+    start_time = time.clock()
+
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            iter = (epoch - 1) * n_train_batches + minibatch_index
+
+            if iter % 100 == 0:
+                print 'training @ iter = ', iter
+            cost_ij = train_model(minibatch_index)
  
+            if (iter + 1) % validation_frequency == 0:
+
+                # compute zero-one loss on validation set
+                validation_losses = [validate_model(i) for i
+                                     in xrange(n_valid_batches)]
+                this_validation_loss = np.mean(validation_losses)
+                print('epoch %i, minibatch %i/%i, validation error %f %%' %
+                      (epoch, minibatch_index + 1, n_train_batches,
+                       this_validation_loss * 100.))
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+
+                    #improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss *  \
+                       improvement_threshold:
+                        patience = max(patience, iter * patience_increase)
+
+                    # save best validation score and iteration number
+                    best_validation_loss = this_validation_loss
+                    best_iter = iter
+
+                    # test it on the test set
+                    test_losses = [
+                        test_model(i)
+                        for i in xrange(n_test_batches)
+                    ]
+                    test_score = np.mean(test_losses)
+                    print(('     epoch %i, minibatch %i/%i, test error of '
+                           'best model %f %%') %
+                          (epoch, minibatch_index + 1, n_train_batches,
+                           test_score * 100.))
+
+            if patience <= iter:
+                done_looping = True
+                break
+
+    end_time = time.clock()
+    print('Optimization complete.')
+    print('Best validation score of %f %% obtained at iteration %i, '
+          'with test performance %f %%' %
+          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print >> sys.stderr, ('The code for file ' +
+                          os.path.split(__file__)[1] +
+                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
...	...	@@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase):
39	39
40	40	def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
41	41	learning_rate=0.1, n_epochs=200,
42		- nkerns=[20, 50, 50],
	42	+ nkerns=[20, 50],
43	43	batch_size=400):
44	44
45		- # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
46		- # batch_size=batch_size)
	45	+ # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
	46	+ # batch_size=batch_size)
47	47
48		- with gzip.open(dataset, 'rb') as f:
49		- train_set, valid_set, test_set = cPickle.load(f)
	48	+ X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
50	49
51		- train_set_x, train_set_y = shared_dataset(train_set)
52		- valid_set_x, valid_set_y = shared_dataset(valid_set)
53		- test_set_x, test_set_y = shared_dataset(test_set)
	50	+ train_set_x, train_set_y = shared_dataset((X_train, Y_train))
	51	+ valid_set_x, valid_set_y = train_set_x[:1000], train_set_y[:1000]
	52	+ test_set_x, test_set_y = shared_dataset((X_test, Y_test))
54	53
55	54	# compute number of minibatches for training, validation and testing
56	55	n_train_batches = train_set_x.get_value(borrow=True).shape[0]
...	...	@@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase):
60	59	n_valid_batches /= batch_size
61	60	n_test_batches /= batch_size
62	61
63		- print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape
	62	+ print train_set_x.get_value(borrow=True).shape, train_set_y.shape
64	63
65	64	rng = np.random.RandomState(12306)
66	65	index = T.lscalar() # index to a [mini]batch
67	66	# start-snippet-1
68		- x = T.matrix('x') # the data is presented as rasterized images
	67	+ x = T.matrix('x') # the data is presented as rasterized images
69	68	y = T.ivector('y') # the labels are presented as 1D vector of
70		- # [int] labels
	69	+ # [int] labels
71	70
72	71	######################
73	72	# BUILD ACTUAL MODEL #
74	73	######################
75	74	print '... building the model'
76	75
77		- layer0_input = x.reshape((batch_size, 1, 28, 28))
	76	+ layer0_input = x.reshape((batch_size, 1, 304, 304))
78	77
79	78	# Construct the first convolutional pooling layer:
80		- # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
81		- # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
82		- # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
	79	+ # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
	80	+ # maxpooling reduces this further to (297/2, 297/2) = (148, 148)
	81	+ # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148)
83	82	layer0 = ConvPoolLayer(
84	83	rng,
85	84	input=layer0_input,
86		- image_shape=(batch_size, 1, 28, 28),
87		- filter_shape=(nkerns[0], 1, 5, 5),
	85	+ image_shape=(batch_size, 1, 304, 304),
	86	+ filter_shape=(nkerns[0], 1, 8, 8),
88	87	poolsize=(2, 2)
89	88	)
90	89
91	90	# Construct the second convolutional pooling layer
92		- # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
93		- # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
94		- # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
	91	+ # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144)
	92	+ # maxpooling reduces this further to (144/4, 144/4) = (38, 38)
	93	+ # 4D output tensor is thus of shape (batch_size, nkerns[1], 38, 38)
95	94	layer1 = ConvPoolLayer(
96	95	rng,
97	96	input=layer0.output,
98		- image_shape=(batch_size, nkerns[0], 12, 12),
	97	+ image_shape=(batch_size, nkerns[0], 148, 148),
99	98	filter_shape=(nkerns[1], nkerns[0], 5, 5),
100		- poolsize=(2, 2)
	99	+ poolsize=(4, 4)
101	100	)
102	101
103	102	# the HiddenLayer being fully-connected, it operates on 2D matrices of
...	...	@@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase):
110	109	layer2 = HiddenLayer(
111	110	rng,
112	111	input=layer2_input,
113		- n_in=nkerns[1] * 4 * 4,
	112	+ n_in=nkerns[1] * 38 * 38,
114	113	n_out=500,
115	114	activation=T.tanh
116	115	)
117	116
118	117	# classify the values of the fully-connected sigmoidal layer
119		- layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
	118	+ layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2)
120	119
121	120	# the cost we minimize during training is the NLL of the model
122	121	cost = layer3.negative_log_likelihood(y)
...	...	@@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase):
155	154	(param_i, param_i - learning_rate * grad_i)
156	155	for param_i, grad_i in zip(params, grads)
157	156	]
158		-
	157	+ """
	158	+ Total Parameters:
	159	+ >>> 20 * 64 + 1000 * 25 + 50 * 38 * 38 * 500 + 500 * 2
	160	+ 36127280
	161	+ """
159	162	train_model = theano.function(
160	163	[index],
161	164	cost,
...	...	@@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase):
174	177	# early-stopping parameters
175	178	patience = 10000 # look as this many examples regardless
176	179	patience_increase = 2 # wait this much longer when a new best is
177		- # found
	180	+ # found
178	181	improvement_threshold = 0.995 # a relative improvement of this much is
179		- # considered significant
	182	+ # considered significant
180	183	validation_frequency = min(n_train_batches, patience / 2)
181		- # go through this many
182		- # minibatche before checking the network
183		- # on the validation set; in this case we
184		- # check every epoch
	184	+ # go through this many
	185	+ # minibatche before checking the network
	186	+ # on the validation set; in this case we
	187	+ # check every epoch
185	188
186	189	best_validation_loss = np.inf
187	190	best_iter = 0
...	...	@@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase):
197	200
198	201	iter = (epoch - 1) * n_train_batches + minibatch_index
199	202
200		- if iter % 100 == 0:
201		- print 'training @ iter = ', iter
	203	+ # if iter % 100 == 0:
	204	+ print 'training @ iter = ', iter
202	205	cost_ij = train_model(minibatch_index)
203	206
204	207	if (iter + 1) % validation_frequency == 0:
...	...	@@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase):
215	218	if this_validation_loss < best_validation_loss:
216	219
217	220	#improve patience if loss improvement is good enough
218		- if this_validation_loss < best_validation_loss * \
219		- improvement_threshold:
	221	+ if this_validation_loss < best_validation_loss * \
	222	+ improvement_threshold:
220	223	patience = max(patience, iter * patience_increase)
221	224
222	225	# save best validation score and iteration number
...	...	@@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase):
248	251	' ran for %.2fm' % ((end_time - start_time) / 60.))
249	252
250	253
251		-
252		-
253		-
254		-
255		-
256		-
257		-
258		-
259	254	def train(self, X, Y):
260	255	if self.toolset == 'cnn':
261	256	return self._train_cnn(X, Y)
...	...
...	...	@@ -11,6 +11,7 @@ import theano.tensor as T
11	11	from theano.tensor.signal import downsample
12	12	from theano.tensor.nnet import conv
13	13
	14	+import gzip
14	15	import cPickle
15	16
16	17
...	...	@@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True):
193	194	# lets ous get around this issue
194	195	return shared_x, T.cast(shared_y, 'int32')
195	196
196		-def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
	197	+def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
197	198	learning_rate=0.1, n_epochs=200,
198	199	nkerns=[20, 50, 50],
199	200	batch_size=400):
...	...	@@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'il
388	389
389	390
390	391
	392	+def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'),
	393	+ learning_rate=0.1, n_epochs=200,
	394	+ nkerns=[20, 50],
	395	+ batch_size=500):
	396	+
	397	+ # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
	398	+ # batch_size=batch_size)
	399	+
	400	+ with gzip.open(dataset, 'rb') as f:
	401	+ train_set, valid_set, test_set = cPickle.load(f)
	402	+
	403	+ train_set_x, train_set_y = shared_dataset(train_set)
	404	+ valid_set_x, valid_set_y = shared_dataset(valid_set)
	405	+ test_set_x, test_set_y = shared_dataset(test_set)
	406	+
	407	+ # compute number of minibatches for training, validation and testing
	408	+ n_train_batches = train_set_x.get_value(borrow=True).shape[0]
	409	+ n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
	410	+ n_test_batches = test_set_x.get_value(borrow=True).shape[0]
	411	+ n_train_batches /= batch_size
	412	+ n_valid_batches /= batch_size
	413	+ n_test_batches /= batch_size
	414	+
	415	+ print train_set_x.get_value(borrow=True).shape, train_set_y.shape
	416	+
	417	+ rng = np.random.RandomState(12306)
	418	+ index = T.lscalar() # index to a [mini]batch
	419	+ # start-snippet-1
	420	+ x = T.matrix('x') # the data is presented as rasterized images
	421	+ y = T.ivector('y') # the labels are presented as 1D vector of
	422	+ # [int] labels
	423	+
	424	+ ######################
	425	+ # BUILD ACTUAL MODEL #
	426	+ ######################
	427	+ print '... building the model'
	428	+
	429	+ layer0_input = x.reshape((batch_size, 1, 28, 28))
	430	+
	431	+ # Construct the first convolutional pooling layer:
	432	+ # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
	433	+ # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
	434	+ # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
	435	+ layer0 = ConvPoolLayer(
	436	+ rng,
	437	+ input=layer0_input,
	438	+ image_shape=(batch_size, 1, 28, 28),
	439	+ filter_shape=(nkerns[0], 1, 5, 5),
	440	+ poolsize=(2, 2)
	441	+ )
	442	+
	443	+ # Construct the second convolutional pooling layer
	444	+ # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
	445	+ # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
	446	+ # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
	447	+ layer1 = ConvPoolLayer(
	448	+ rng,
	449	+ input=layer0.output,
	450	+ image_shape=(batch_size, nkerns[0], 12, 12),
	451	+ filter_shape=(nkerns[1], nkerns[0], 5, 5),
	452	+ poolsize=(2, 2)
	453	+ )
	454	+
	455	+ # the HiddenLayer being fully-connected, it operates on 2D matrices of
	456	+ # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
	457	+ # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
	458	+ # or (500, 50 * 4 * 4) = (500, 800) with the default values.
	459	+ layer2_input = layer1.output.flatten(2)
	460	+
	461	+ # construct a fully-connected sigmoidal layer
	462	+ layer2 = HiddenLayer(
	463	+ rng,
	464	+ input=layer2_input,
	465	+ n_in=nkerns[1] * 4 * 4,
	466	+ n_out=500,
	467	+ activation=T.tanh
	468	+ )
	469	+
	470	+ # classify the values of the fully-connected sigmoidal layer
	471	+ layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
	472	+
	473	+ # the cost we minimize during training is the NLL of the model
	474	+ cost = layer3.negative_log_likelihood(y)
	475	+
	476	+ # create a function to compute the mistakes that are made by the model
	477	+ test_model = theano.function(
	478	+ [index],
	479	+ layer3.errors(y),
	480	+ givens={
	481	+ x: test_set_x[index * batch_size: (index + 1) * batch_size],
	482	+ y: test_set_y[index * batch_size: (index + 1) * batch_size]
	483	+ }
	484	+ )
	485	+
	486	+ validate_model = theano.function(
	487	+ [index],
	488	+ layer3.errors(y),
	489	+ givens={
	490	+ x: valid_set_x[index * batch_size: (index + 1) * batch_size],
	491	+ y: valid_set_y[index * batch_size: (index + 1) * batch_size]
	492	+ }
	493	+ )
	494	+
	495	+ # create a list of all model parameters to be fit by gradient descent
	496	+ params = layer3.params + layer2.params + layer1.params + layer0.params
	497	+
	498	+ # create a list of gradients for all model parameters
	499	+ grads = T.grad(cost, params)
	500	+
	501	+ # train_model is a function that updates the model parameters by
	502	+ # SGD Since this model has many parameters, it would be tedious to
	503	+ # manually create an update rule for each model parameter. We thus
	504	+ # create the updates list by automatically looping over all
	505	+ # (params[i], grads[i]) pairs.
	506	+ updates = [
	507	+ (param_i, param_i - learning_rate * grad_i)
	508	+ for param_i, grad_i in zip(params, grads)
	509	+ ]
	510	+
	511	+ train_model = theano.function(
	512	+ [index],
	513	+ cost,
	514	+ updates=updates,
	515	+ givens={
	516	+ x: train_set_x[index * batch_size: (index + 1) * batch_size],
	517	+ y: train_set_y[index * batch_size: (index + 1) * batch_size]
	518	+ }
	519	+ )
	520	+ # end-snippet-1
	521	+
	522	+ ###############
	523	+ # TRAIN MODEL #
	524	+ ###############
	525	+ print '... training'
	526	+ # early-stopping parameters
	527	+ patience = 10000 # look as this many examples regardless
	528	+ patience_increase = 2 # wait this much longer when a new best is
	529	+ # found
	530	+ improvement_threshold = 0.995 # a relative improvement of this much is
	531	+ # considered significant
	532	+ validation_frequency = min(n_train_batches, patience / 2)
	533	+ # go through this many
	534	+ # minibatche before checking the network
	535	+ # on the validation set; in this case we
	536	+ # check every epoch
	537	+
	538	+ best_validation_loss = np.inf
	539	+ best_iter = 0
	540	+ test_score = 0.
	541	+ start_time = time.clock()
	542	+
	543	+ epoch = 0
	544	+ done_looping = False
	545	+
	546	+ while (epoch < n_epochs) and (not done_looping):
	547	+ epoch = epoch + 1
	548	+ for minibatch_index in xrange(n_train_batches):
	549	+
	550	+ iter = (epoch - 1) * n_train_batches + minibatch_index
	551	+
	552	+ if iter % 100 == 0:
	553	+ print 'training @ iter = ', iter
	554	+ cost_ij = train_model(minibatch_index)
391	555
	556	+ if (iter + 1) % validation_frequency == 0:
	557	+
	558	+ # compute zero-one loss on validation set
	559	+ validation_losses = [validate_model(i) for i
	560	+ in xrange(n_valid_batches)]
	561	+ this_validation_loss = np.mean(validation_losses)
	562	+ print('epoch %i, minibatch %i/%i, validation error %f %%' %
	563	+ (epoch, minibatch_index + 1, n_train_batches,
	564	+ this_validation_loss * 100.))
	565	+
	566	+ # if we got the best validation score until now
	567	+ if this_validation_loss < best_validation_loss:
	568	+
	569	+ #improve patience if loss improvement is good enough
	570	+ if this_validation_loss < best_validation_loss * \
	571	+ improvement_threshold:
	572	+ patience = max(patience, iter * patience_increase)
	573	+
	574	+ # save best validation score and iteration number
	575	+ best_validation_loss = this_validation_loss
	576	+ best_iter = iter
	577	+
	578	+ # test it on the test set
	579	+ test_losses = [
	580	+ test_model(i)
	581	+ for i in xrange(n_test_batches)
	582	+ ]
	583	+ test_score = np.mean(test_losses)
	584	+ print((' epoch %i, minibatch %i/%i, test error of '
	585	+ 'best model %f %%') %
	586	+ (epoch, minibatch_index + 1, n_train_batches,
	587	+ test_score * 100.))
	588	+
	589	+ if patience <= iter:
	590	+ done_looping = True
	591	+ break
	592	+
	593	+ end_time = time.clock()
	594	+ print('Optimization complete.')
	595	+ print('Best validation score of %f %% obtained at iteration %i, '
	596	+ 'with test performance %f %%' %
	597	+ (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	598	+ print >> sys.stderr, ('The code for file ' +
	599	+ os.path.split(__file__)[1] +
	600	+ ' ran for %.2fm' % ((end_time - start_time) / 60.))
392	601
393	602
394	603
...	...