Commit 4eac668051893fcef90749f43f9deebadda8c57a

Authored by Chunk
1 parent defa5614
Exists in master and in 1 other branch refactor

3-conv-layer to 2-conv-layer model.

mmodel/theano/THEANO.py
@@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase): @@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase):
39 39
40 def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'), 40 def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
41 learning_rate=0.1, n_epochs=200, 41 learning_rate=0.1, n_epochs=200,
42 - nkerns=[20, 50, 50], 42 + nkerns=[20, 50],
43 batch_size=400): 43 batch_size=400):
44 44
45 - # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,  
46 - # batch_size=batch_size) 45 + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
  46 + # batch_size=batch_size)
47 47
48 - with gzip.open(dataset, 'rb') as f:  
49 - train_set, valid_set, test_set = cPickle.load(f) 48 + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
50 49
51 - train_set_x, train_set_y = shared_dataset(train_set)  
52 - valid_set_x, valid_set_y = shared_dataset(valid_set)  
53 - test_set_x, test_set_y = shared_dataset(test_set) 50 + train_set_x, train_set_y = shared_dataset((X_train, Y_train))
  51 + valid_set_x, valid_set_y = train_set_x[:1000], train_set_y[:1000]
  52 + test_set_x, test_set_y = shared_dataset((X_test, Y_test))
54 53
55 # compute number of minibatches for training, validation and testing 54 # compute number of minibatches for training, validation and testing
56 n_train_batches = train_set_x.get_value(borrow=True).shape[0] 55 n_train_batches = train_set_x.get_value(borrow=True).shape[0]
@@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase): @@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase):
60 n_valid_batches /= batch_size 59 n_valid_batches /= batch_size
61 n_test_batches /= batch_size 60 n_test_batches /= batch_size
62 61
63 - print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape 62 + print train_set_x.get_value(borrow=True).shape, train_set_y.shape
64 63
65 rng = np.random.RandomState(12306) 64 rng = np.random.RandomState(12306)
66 index = T.lscalar() # index to a [mini]batch 65 index = T.lscalar() # index to a [mini]batch
67 # start-snippet-1 66 # start-snippet-1
68 - x = T.matrix('x') # the data is presented as rasterized images 67 + x = T.matrix('x') # the data is presented as rasterized images
69 y = T.ivector('y') # the labels are presented as 1D vector of 68 y = T.ivector('y') # the labels are presented as 1D vector of
70 - # [int] labels 69 + # [int] labels
71 70
72 ###################### 71 ######################
73 # BUILD ACTUAL MODEL # 72 # BUILD ACTUAL MODEL #
74 ###################### 73 ######################
75 print '... building the model' 74 print '... building the model'
76 75
77 - layer0_input = x.reshape((batch_size, 1, 28, 28)) 76 + layer0_input = x.reshape((batch_size, 1, 304, 304))
78 77
79 # Construct the first convolutional pooling layer: 78 # Construct the first convolutional pooling layer:
80 - # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)  
81 - # maxpooling reduces this further to (24/2, 24/2) = (12, 12)  
82 - # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) 79 + # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
  80 + # maxpooling reduces this further to (297/2, 297/2) = (148, 148)
  81 + # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148)
83 layer0 = ConvPoolLayer( 82 layer0 = ConvPoolLayer(
84 rng, 83 rng,
85 input=layer0_input, 84 input=layer0_input,
86 - image_shape=(batch_size, 1, 28, 28),  
87 - filter_shape=(nkerns[0], 1, 5, 5), 85 + image_shape=(batch_size, 1, 304, 304),
  86 + filter_shape=(nkerns[0], 1, 8, 8),
88 poolsize=(2, 2) 87 poolsize=(2, 2)
89 ) 88 )
90 89
91 # Construct the second convolutional pooling layer 90 # Construct the second convolutional pooling layer
92 - # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)  
93 - # maxpooling reduces this further to (8/2, 8/2) = (4, 4)  
94 - # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) 91 + # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144)
  92 + # maxpooling reduces this further to (144/4, 144/4) = (38, 38)
  93 + # 4D output tensor is thus of shape (batch_size, nkerns[1], 38, 38)
95 layer1 = ConvPoolLayer( 94 layer1 = ConvPoolLayer(
96 rng, 95 rng,
97 input=layer0.output, 96 input=layer0.output,
98 - image_shape=(batch_size, nkerns[0], 12, 12), 97 + image_shape=(batch_size, nkerns[0], 148, 148),
99 filter_shape=(nkerns[1], nkerns[0], 5, 5), 98 filter_shape=(nkerns[1], nkerns[0], 5, 5),
100 - poolsize=(2, 2) 99 + poolsize=(4, 4)
101 ) 100 )
102 101
103 # the HiddenLayer being fully-connected, it operates on 2D matrices of 102 # the HiddenLayer being fully-connected, it operates on 2D matrices of
@@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase): @@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase):
110 layer2 = HiddenLayer( 109 layer2 = HiddenLayer(
111 rng, 110 rng,
112 input=layer2_input, 111 input=layer2_input,
113 - n_in=nkerns[1] * 4 * 4, 112 + n_in=nkerns[1] * 38 * 38,
114 n_out=500, 113 n_out=500,
115 activation=T.tanh 114 activation=T.tanh
116 ) 115 )
117 116
118 # classify the values of the fully-connected sigmoidal layer 117 # classify the values of the fully-connected sigmoidal layer
119 - layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) 118 + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2)
120 119
121 # the cost we minimize during training is the NLL of the model 120 # the cost we minimize during training is the NLL of the model
122 cost = layer3.negative_log_likelihood(y) 121 cost = layer3.negative_log_likelihood(y)
@@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase): @@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase):
155 (param_i, param_i - learning_rate * grad_i) 154 (param_i, param_i - learning_rate * grad_i)
156 for param_i, grad_i in zip(params, grads) 155 for param_i, grad_i in zip(params, grads)
157 ] 156 ]
158 - 157 + """
  158 + Total Parameters:
  159 + >>> 20 * 64 + 1000 * 25 + 50 * 38 * 38 * 500 + 500 * 2
  160 + 36127280
  161 + """
159 train_model = theano.function( 162 train_model = theano.function(
160 [index], 163 [index],
161 cost, 164 cost,
@@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase): @@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase):
174 # early-stopping parameters 177 # early-stopping parameters
175 patience = 10000 # look as this many examples regardless 178 patience = 10000 # look as this many examples regardless
176 patience_increase = 2 # wait this much longer when a new best is 179 patience_increase = 2 # wait this much longer when a new best is
177 - # found 180 + # found
178 improvement_threshold = 0.995 # a relative improvement of this much is 181 improvement_threshold = 0.995 # a relative improvement of this much is
179 - # considered significant 182 + # considered significant
180 validation_frequency = min(n_train_batches, patience / 2) 183 validation_frequency = min(n_train_batches, patience / 2)
181 - # go through this many  
182 - # minibatche before checking the network  
183 - # on the validation set; in this case we  
184 - # check every epoch 184 + # go through this many
  185 + # minibatche before checking the network
  186 + # on the validation set; in this case we
  187 + # check every epoch
185 188
186 best_validation_loss = np.inf 189 best_validation_loss = np.inf
187 best_iter = 0 190 best_iter = 0
@@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase): @@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase):
197 200
198 iter = (epoch - 1) * n_train_batches + minibatch_index 201 iter = (epoch - 1) * n_train_batches + minibatch_index
199 202
200 - if iter % 100 == 0:  
201 - print 'training @ iter = ', iter 203 + # if iter % 100 == 0:
  204 + print 'training @ iter = ', iter
202 cost_ij = train_model(minibatch_index) 205 cost_ij = train_model(minibatch_index)
203 206
204 if (iter + 1) % validation_frequency == 0: 207 if (iter + 1) % validation_frequency == 0:
@@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase): @@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase):
215 if this_validation_loss < best_validation_loss: 218 if this_validation_loss < best_validation_loss:
216 219
217 #improve patience if loss improvement is good enough 220 #improve patience if loss improvement is good enough
218 - if this_validation_loss < best_validation_loss * \  
219 - improvement_threshold: 221 + if this_validation_loss < best_validation_loss * \
  222 + improvement_threshold:
220 patience = max(patience, iter * patience_increase) 223 patience = max(patience, iter * patience_increase)
221 224
222 # save best validation score and iteration number 225 # save best validation score and iteration number
@@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase): @@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase):
248 ' ran for %.2fm' % ((end_time - start_time) / 60.)) 251 ' ran for %.2fm' % ((end_time - start_time) / 60.))
249 252
250 253
251 -  
252 -  
253 -  
254 -  
255 -  
256 -  
257 -  
258 -  
259 def train(self, X, Y): 254 def train(self, X, Y):
260 if self.toolset == 'cnn': 255 if self.toolset == 'cnn':
261 return self._train_cnn(X, Y) 256 return self._train_cnn(X, Y)
mmodel/theano/theanoutil.py
@@ -11,6 +11,7 @@ import theano.tensor as T @@ -11,6 +11,7 @@ import theano.tensor as T
11 from theano.tensor.signal import downsample 11 from theano.tensor.signal import downsample
12 from theano.tensor.nnet import conv 12 from theano.tensor.nnet import conv
13 13
  14 +import gzip
14 import cPickle 15 import cPickle
15 16
16 17
@@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True): @@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True):
193 # lets ous get around this issue 194 # lets ous get around this issue
194 return shared_x, T.cast(shared_y, 'int32') 195 return shared_x, T.cast(shared_y, 'int32')
195 196
196 -def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'), 197 +def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
197 learning_rate=0.1, n_epochs=200, 198 learning_rate=0.1, n_epochs=200,
198 nkerns=[20, 50, 50], 199 nkerns=[20, 50, 50],
199 batch_size=400): 200 batch_size=400):
@@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il @@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il
388 389
389 390
390 391
  392 +def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'),
  393 + learning_rate=0.1, n_epochs=200,
  394 + nkerns=[20, 50],
  395 + batch_size=500):
  396 +
  397 + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
  398 + # batch_size=batch_size)
  399 +
  400 + with gzip.open(dataset, 'rb') as f:
  401 + train_set, valid_set, test_set = cPickle.load(f)
  402 +
  403 + train_set_x, train_set_y = shared_dataset(train_set)
  404 + valid_set_x, valid_set_y = shared_dataset(valid_set)
  405 + test_set_x, test_set_y = shared_dataset(test_set)
  406 +
  407 + # compute number of minibatches for training, validation and testing
  408 + n_train_batches = train_set_x.get_value(borrow=True).shape[0]
  409 + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
  410 + n_test_batches = test_set_x.get_value(borrow=True).shape[0]
  411 + n_train_batches /= batch_size
  412 + n_valid_batches /= batch_size
  413 + n_test_batches /= batch_size
  414 +
  415 + print train_set_x.get_value(borrow=True).shape, train_set_y.shape
  416 +
  417 + rng = np.random.RandomState(12306)
  418 + index = T.lscalar() # index to a [mini]batch
  419 + # start-snippet-1
  420 + x = T.matrix('x') # the data is presented as rasterized images
  421 + y = T.ivector('y') # the labels are presented as 1D vector of
  422 + # [int] labels
  423 +
  424 + ######################
  425 + # BUILD ACTUAL MODEL #
  426 + ######################
  427 + print '... building the model'
  428 +
  429 + layer0_input = x.reshape((batch_size, 1, 28, 28))
  430 +
  431 + # Construct the first convolutional pooling layer:
  432 + # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
  433 + # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
  434 + # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
  435 + layer0 = ConvPoolLayer(
  436 + rng,
  437 + input=layer0_input,
  438 + image_shape=(batch_size, 1, 28, 28),
  439 + filter_shape=(nkerns[0], 1, 5, 5),
  440 + poolsize=(2, 2)
  441 + )
  442 +
  443 + # Construct the second convolutional pooling layer
  444 + # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
  445 + # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
  446 + # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
  447 + layer1 = ConvPoolLayer(
  448 + rng,
  449 + input=layer0.output,
  450 + image_shape=(batch_size, nkerns[0], 12, 12),
  451 + filter_shape=(nkerns[1], nkerns[0], 5, 5),
  452 + poolsize=(2, 2)
  453 + )
  454 +
  455 + # the HiddenLayer being fully-connected, it operates on 2D matrices of
  456 + # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
  457 + # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
  458 + # or (500, 50 * 4 * 4) = (500, 800) with the default values.
  459 + layer2_input = layer1.output.flatten(2)
  460 +
  461 + # construct a fully-connected sigmoidal layer
  462 + layer2 = HiddenLayer(
  463 + rng,
  464 + input=layer2_input,
  465 + n_in=nkerns[1] * 4 * 4,
  466 + n_out=500,
  467 + activation=T.tanh
  468 + )
  469 +
  470 + # classify the values of the fully-connected sigmoidal layer
  471 + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
  472 +
  473 + # the cost we minimize during training is the NLL of the model
  474 + cost = layer3.negative_log_likelihood(y)
  475 +
  476 + # create a function to compute the mistakes that are made by the model
  477 + test_model = theano.function(
  478 + [index],
  479 + layer3.errors(y),
  480 + givens={
  481 + x: test_set_x[index * batch_size: (index + 1) * batch_size],
  482 + y: test_set_y[index * batch_size: (index + 1) * batch_size]
  483 + }
  484 + )
  485 +
  486 + validate_model = theano.function(
  487 + [index],
  488 + layer3.errors(y),
  489 + givens={
  490 + x: valid_set_x[index * batch_size: (index + 1) * batch_size],
  491 + y: valid_set_y[index * batch_size: (index + 1) * batch_size]
  492 + }
  493 + )
  494 +
  495 + # create a list of all model parameters to be fit by gradient descent
  496 + params = layer3.params + layer2.params + layer1.params + layer0.params
  497 +
  498 + # create a list of gradients for all model parameters
  499 + grads = T.grad(cost, params)
  500 +
  501 + # train_model is a function that updates the model parameters by
  502 + # SGD Since this model has many parameters, it would be tedious to
  503 + # manually create an update rule for each model parameter. We thus
  504 + # create the updates list by automatically looping over all
  505 + # (params[i], grads[i]) pairs.
  506 + updates = [
  507 + (param_i, param_i - learning_rate * grad_i)
  508 + for param_i, grad_i in zip(params, grads)
  509 + ]
  510 +
  511 + train_model = theano.function(
  512 + [index],
  513 + cost,
  514 + updates=updates,
  515 + givens={
  516 + x: train_set_x[index * batch_size: (index + 1) * batch_size],
  517 + y: train_set_y[index * batch_size: (index + 1) * batch_size]
  518 + }
  519 + )
  520 + # end-snippet-1
  521 +
  522 + ###############
  523 + # TRAIN MODEL #
  524 + ###############
  525 + print '... training'
  526 + # early-stopping parameters
  527 + patience = 10000 # look as this many examples regardless
  528 + patience_increase = 2 # wait this much longer when a new best is
  529 + # found
  530 + improvement_threshold = 0.995 # a relative improvement of this much is
  531 + # considered significant
  532 + validation_frequency = min(n_train_batches, patience / 2)
  533 + # go through this many
  534 + # minibatche before checking the network
  535 + # on the validation set; in this case we
  536 + # check every epoch
  537 +
  538 + best_validation_loss = np.inf
  539 + best_iter = 0
  540 + test_score = 0.
  541 + start_time = time.clock()
  542 +
  543 + epoch = 0
  544 + done_looping = False
  545 +
  546 + while (epoch < n_epochs) and (not done_looping):
  547 + epoch = epoch + 1
  548 + for minibatch_index in xrange(n_train_batches):
  549 +
  550 + iter = (epoch - 1) * n_train_batches + minibatch_index
  551 +
  552 + if iter % 100 == 0:
  553 + print 'training @ iter = ', iter
  554 + cost_ij = train_model(minibatch_index)
391 555
  556 + if (iter + 1) % validation_frequency == 0:
  557 +
  558 + # compute zero-one loss on validation set
  559 + validation_losses = [validate_model(i) for i
  560 + in xrange(n_valid_batches)]
  561 + this_validation_loss = np.mean(validation_losses)
  562 + print('epoch %i, minibatch %i/%i, validation error %f %%' %
  563 + (epoch, minibatch_index + 1, n_train_batches,
  564 + this_validation_loss * 100.))
  565 +
  566 + # if we got the best validation score until now
  567 + if this_validation_loss < best_validation_loss:
  568 +
  569 + #improve patience if loss improvement is good enough
  570 + if this_validation_loss < best_validation_loss * \
  571 + improvement_threshold:
  572 + patience = max(patience, iter * patience_increase)
  573 +
  574 + # save best validation score and iteration number
  575 + best_validation_loss = this_validation_loss
  576 + best_iter = iter
  577 +
  578 + # test it on the test set
  579 + test_losses = [
  580 + test_model(i)
  581 + for i in xrange(n_test_batches)
  582 + ]
  583 + test_score = np.mean(test_losses)
  584 + print((' epoch %i, minibatch %i/%i, test error of '
  585 + 'best model %f %%') %
  586 + (epoch, minibatch_index + 1, n_train_batches,
  587 + test_score * 100.))
  588 +
  589 + if patience <= iter:
  590 + done_looping = True
  591 + break
  592 +
  593 + end_time = time.clock()
  594 + print('Optimization complete.')
  595 + print('Best validation score of %f %% obtained at iteration %i, '
  596 + 'with test performance %f %%' %
  597 + (best_validation_loss * 100., best_iter + 1, test_score * 100.))
  598 + print >> sys.stderr, ('The code for file ' +
  599 + os.path.split(__file__)[1] +
  600 + ' ran for %.2fm' % ((end_time - start_time) / 60.))
392 601
393 602
394 603