Commit 4eac668051893fcef90749f43f9deebadda8c57a

Authored by Chunk
1 parent defa5614
Exists in master and in 1 other branch refactor

3-conv-layer to 2-conv-layer model.

mmodel/theano/THEANO.py
... ... @@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase):
39 39  
40 40 def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
41 41 learning_rate=0.1, n_epochs=200,
42   - nkerns=[20, 50, 50],
  42 + nkerns=[20, 50],
43 43 batch_size=400):
44 44  
45   - # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
46   - # batch_size=batch_size)
  45 + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
  46 + # batch_size=batch_size)
47 47  
48   - with gzip.open(dataset, 'rb') as f:
49   - train_set, valid_set, test_set = cPickle.load(f)
  48 + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
50 49  
51   - train_set_x, train_set_y = shared_dataset(train_set)
52   - valid_set_x, valid_set_y = shared_dataset(valid_set)
53   - test_set_x, test_set_y = shared_dataset(test_set)
  50 + train_set_x, train_set_y = shared_dataset((X_train, Y_train))
  51 + valid_set_x, valid_set_y = train_set_x[:1000], train_set_y[:1000]
  52 + test_set_x, test_set_y = shared_dataset((X_test, Y_test))
54 53  
55 54 # compute number of minibatches for training, validation and testing
56 55 n_train_batches = train_set_x.get_value(borrow=True).shape[0]
... ... @@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase):
60 59 n_valid_batches /= batch_size
61 60 n_test_batches /= batch_size
62 61  
63   - print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape
  62 + print train_set_x.get_value(borrow=True).shape, train_set_y.shape
64 63  
65 64 rng = np.random.RandomState(12306)
66 65 index = T.lscalar() # index to a [mini]batch
67 66 # start-snippet-1
68   - x = T.matrix('x') # the data is presented as rasterized images
  67 + x = T.matrix('x') # the data is presented as rasterized images
69 68 y = T.ivector('y') # the labels are presented as 1D vector of
70   - # [int] labels
  69 + # [int] labels
71 70  
72 71 ######################
73 72 # BUILD ACTUAL MODEL #
74 73 ######################
75 74 print '... building the model'
76 75  
77   - layer0_input = x.reshape((batch_size, 1, 28, 28))
  76 + layer0_input = x.reshape((batch_size, 1, 304, 304))
78 77  
79 78 # Construct the first convolutional pooling layer:
80   - # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
81   - # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
82   - # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
  79 + # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
  80 + # maxpooling reduces this further to (297/2, 297/2) = (148, 148)
  81 + # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148)
83 82 layer0 = ConvPoolLayer(
84 83 rng,
85 84 input=layer0_input,
86   - image_shape=(batch_size, 1, 28, 28),
87   - filter_shape=(nkerns[0], 1, 5, 5),
  85 + image_shape=(batch_size, 1, 304, 304),
  86 + filter_shape=(nkerns[0], 1, 8, 8),
88 87 poolsize=(2, 2)
89 88 )
90 89  
91 90 # Construct the second convolutional pooling layer
92   - # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
93   - # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
94   - # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
  91 + # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144)
  92 + # maxpooling reduces this further to (144/4, 144/4) = (38, 38)
  93 + # 4D output tensor is thus of shape (batch_size, nkerns[1], 38, 38)
95 94 layer1 = ConvPoolLayer(
96 95 rng,
97 96 input=layer0.output,
98   - image_shape=(batch_size, nkerns[0], 12, 12),
  97 + image_shape=(batch_size, nkerns[0], 148, 148),
99 98 filter_shape=(nkerns[1], nkerns[0], 5, 5),
100   - poolsize=(2, 2)
  99 + poolsize=(4, 4)
101 100 )
102 101  
103 102 # the HiddenLayer being fully-connected, it operates on 2D matrices of
... ... @@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase):
110 109 layer2 = HiddenLayer(
111 110 rng,
112 111 input=layer2_input,
113   - n_in=nkerns[1] * 4 * 4,
  112 + n_in=nkerns[1] * 38 * 38,
114 113 n_out=500,
115 114 activation=T.tanh
116 115 )
117 116  
118 117 # classify the values of the fully-connected sigmoidal layer
119   - layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
  118 + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2)
120 119  
121 120 # the cost we minimize during training is the NLL of the model
122 121 cost = layer3.negative_log_likelihood(y)
... ... @@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase):
155 154 (param_i, param_i - learning_rate * grad_i)
156 155 for param_i, grad_i in zip(params, grads)
157 156 ]
158   -
  157 + """
  158 + Total Parameters:
  159 + >>> 20 * 64 + 1000 * 25 + 50 * 38 * 38 * 500 + 500 * 2
  160 + 36127280
  161 + """
159 162 train_model = theano.function(
160 163 [index],
161 164 cost,
... ... @@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase):
174 177 # early-stopping parameters
175 178 patience = 10000 # look as this many examples regardless
176 179 patience_increase = 2 # wait this much longer when a new best is
177   - # found
  180 + # found
178 181 improvement_threshold = 0.995 # a relative improvement of this much is
179   - # considered significant
  182 + # considered significant
180 183 validation_frequency = min(n_train_batches, patience / 2)
181   - # go through this many
182   - # minibatche before checking the network
183   - # on the validation set; in this case we
184   - # check every epoch
  184 + # go through this many
  185 + # minibatche before checking the network
  186 + # on the validation set; in this case we
  187 + # check every epoch
185 188  
186 189 best_validation_loss = np.inf
187 190 best_iter = 0
... ... @@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase):
197 200  
198 201 iter = (epoch - 1) * n_train_batches + minibatch_index
199 202  
200   - if iter % 100 == 0:
201   - print 'training @ iter = ', iter
  203 + # if iter % 100 == 0:
  204 + print 'training @ iter = ', iter
202 205 cost_ij = train_model(minibatch_index)
203 206  
204 207 if (iter + 1) % validation_frequency == 0:
... ... @@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase):
215 218 if this_validation_loss < best_validation_loss:
216 219  
217 220 #improve patience if loss improvement is good enough
218   - if this_validation_loss < best_validation_loss * \
219   - improvement_threshold:
  221 + if this_validation_loss < best_validation_loss * \
  222 + improvement_threshold:
220 223 patience = max(patience, iter * patience_increase)
221 224  
222 225 # save best validation score and iteration number
... ... @@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase):
248 251 ' ran for %.2fm' % ((end_time - start_time) / 60.))
249 252  
250 253  
251   -
252   -
253   -
254   -
255   -
256   -
257   -
258   -
259 254 def train(self, X, Y):
260 255 if self.toolset == 'cnn':
261 256 return self._train_cnn(X, Y)
... ...
mmodel/theano/theanoutil.py
... ... @@ -11,6 +11,7 @@ import theano.tensor as T
11 11 from theano.tensor.signal import downsample
12 12 from theano.tensor.nnet import conv
13 13  
  14 +import gzip
14 15 import cPickle
15 16  
16 17  
... ... @@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True):
193 194 # lets ous get around this issue
194 195 return shared_x, T.cast(shared_y, 'int32')
195 196  
196   -def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
  197 +def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
197 198 learning_rate=0.1, n_epochs=200,
198 199 nkerns=[20, 50, 50],
199 200 batch_size=400):
... ... @@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il
388 389  
389 390  
390 391  
  392 +def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'),
  393 + learning_rate=0.1, n_epochs=200,
  394 + nkerns=[20, 50],
  395 + batch_size=500):
  396 +
  397 + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
  398 + # batch_size=batch_size)
  399 +
  400 + with gzip.open(dataset, 'rb') as f:
  401 + train_set, valid_set, test_set = cPickle.load(f)
  402 +
  403 + train_set_x, train_set_y = shared_dataset(train_set)
  404 + valid_set_x, valid_set_y = shared_dataset(valid_set)
  405 + test_set_x, test_set_y = shared_dataset(test_set)
  406 +
  407 + # compute number of minibatches for training, validation and testing
  408 + n_train_batches = train_set_x.get_value(borrow=True).shape[0]
  409 + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
  410 + n_test_batches = test_set_x.get_value(borrow=True).shape[0]
  411 + n_train_batches /= batch_size
  412 + n_valid_batches /= batch_size
  413 + n_test_batches /= batch_size
  414 +
  415 + print train_set_x.get_value(borrow=True).shape, train_set_y.shape
  416 +
  417 + rng = np.random.RandomState(12306)
  418 + index = T.lscalar() # index to a [mini]batch
  419 + # start-snippet-1
  420 + x = T.matrix('x') # the data is presented as rasterized images
  421 + y = T.ivector('y') # the labels are presented as 1D vector of
  422 + # [int] labels
  423 +
  424 + ######################
  425 + # BUILD ACTUAL MODEL #
  426 + ######################
  427 + print '... building the model'
  428 +
  429 + layer0_input = x.reshape((batch_size, 1, 28, 28))
  430 +
  431 + # Construct the first convolutional pooling layer:
  432 + # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
  433 + # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
  434 + # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
  435 + layer0 = ConvPoolLayer(
  436 + rng,
  437 + input=layer0_input,
  438 + image_shape=(batch_size, 1, 28, 28),
  439 + filter_shape=(nkerns[0], 1, 5, 5),
  440 + poolsize=(2, 2)
  441 + )
  442 +
  443 + # Construct the second convolutional pooling layer
  444 + # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
  445 + # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
  446 + # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
  447 + layer1 = ConvPoolLayer(
  448 + rng,
  449 + input=layer0.output,
  450 + image_shape=(batch_size, nkerns[0], 12, 12),
  451 + filter_shape=(nkerns[1], nkerns[0], 5, 5),
  452 + poolsize=(2, 2)
  453 + )
  454 +
  455 + # the HiddenLayer being fully-connected, it operates on 2D matrices of
  456 + # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
  457 + # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
  458 + # or (500, 50 * 4 * 4) = (500, 800) with the default values.
  459 + layer2_input = layer1.output.flatten(2)
  460 +
  461 + # construct a fully-connected sigmoidal layer
  462 + layer2 = HiddenLayer(
  463 + rng,
  464 + input=layer2_input,
  465 + n_in=nkerns[1] * 4 * 4,
  466 + n_out=500,
  467 + activation=T.tanh
  468 + )
  469 +
  470 + # classify the values of the fully-connected sigmoidal layer
  471 + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
  472 +
  473 + # the cost we minimize during training is the NLL of the model
  474 + cost = layer3.negative_log_likelihood(y)
  475 +
  476 + # create a function to compute the mistakes that are made by the model
  477 + test_model = theano.function(
  478 + [index],
  479 + layer3.errors(y),
  480 + givens={
  481 + x: test_set_x[index * batch_size: (index + 1) * batch_size],
  482 + y: test_set_y[index * batch_size: (index + 1) * batch_size]
  483 + }
  484 + )
  485 +
  486 + validate_model = theano.function(
  487 + [index],
  488 + layer3.errors(y),
  489 + givens={
  490 + x: valid_set_x[index * batch_size: (index + 1) * batch_size],
  491 + y: valid_set_y[index * batch_size: (index + 1) * batch_size]
  492 + }
  493 + )
  494 +
  495 + # create a list of all model parameters to be fit by gradient descent
  496 + params = layer3.params + layer2.params + layer1.params + layer0.params
  497 +
  498 + # create a list of gradients for all model parameters
  499 + grads = T.grad(cost, params)
  500 +
  501 + # train_model is a function that updates the model parameters by
  502 + # SGD Since this model has many parameters, it would be tedious to
  503 + # manually create an update rule for each model parameter. We thus
  504 + # create the updates list by automatically looping over all
  505 + # (params[i], grads[i]) pairs.
  506 + updates = [
  507 + (param_i, param_i - learning_rate * grad_i)
  508 + for param_i, grad_i in zip(params, grads)
  509 + ]
  510 +
  511 + train_model = theano.function(
  512 + [index],
  513 + cost,
  514 + updates=updates,
  515 + givens={
  516 + x: train_set_x[index * batch_size: (index + 1) * batch_size],
  517 + y: train_set_y[index * batch_size: (index + 1) * batch_size]
  518 + }
  519 + )
  520 + # end-snippet-1
  521 +
  522 + ###############
  523 + # TRAIN MODEL #
  524 + ###############
  525 + print '... training'
  526 + # early-stopping parameters
  527 + patience = 10000 # look as this many examples regardless
  528 + patience_increase = 2 # wait this much longer when a new best is
  529 + # found
  530 + improvement_threshold = 0.995 # a relative improvement of this much is
  531 + # considered significant
  532 + validation_frequency = min(n_train_batches, patience / 2)
  533 + # go through this many
  534 + # minibatche before checking the network
  535 + # on the validation set; in this case we
  536 + # check every epoch
  537 +
  538 + best_validation_loss = np.inf
  539 + best_iter = 0
  540 + test_score = 0.
  541 + start_time = time.clock()
  542 +
  543 + epoch = 0
  544 + done_looping = False
  545 +
  546 + while (epoch < n_epochs) and (not done_looping):
  547 + epoch = epoch + 1
  548 + for minibatch_index in xrange(n_train_batches):
  549 +
  550 + iter = (epoch - 1) * n_train_batches + minibatch_index
  551 +
  552 + if iter % 100 == 0:
  553 + print 'training @ iter = ', iter
  554 + cost_ij = train_model(minibatch_index)
391 555  
  556 + if (iter + 1) % validation_frequency == 0:
  557 +
  558 + # compute zero-one loss on validation set
  559 + validation_losses = [validate_model(i) for i
  560 + in xrange(n_valid_batches)]
  561 + this_validation_loss = np.mean(validation_losses)
  562 + print('epoch %i, minibatch %i/%i, validation error %f %%' %
  563 + (epoch, minibatch_index + 1, n_train_batches,
  564 + this_validation_loss * 100.))
  565 +
  566 + # if we got the best validation score until now
  567 + if this_validation_loss < best_validation_loss:
  568 +
  569 + #improve patience if loss improvement is good enough
  570 + if this_validation_loss < best_validation_loss * \
  571 + improvement_threshold:
  572 + patience = max(patience, iter * patience_increase)
  573 +
  574 + # save best validation score and iteration number
  575 + best_validation_loss = this_validation_loss
  576 + best_iter = iter
  577 +
  578 + # test it on the test set
  579 + test_losses = [
  580 + test_model(i)
  581 + for i in xrange(n_test_batches)
  582 + ]
  583 + test_score = np.mean(test_losses)
  584 + print((' epoch %i, minibatch %i/%i, test error of '
  585 + 'best model %f %%') %
  586 + (epoch, minibatch_index + 1, n_train_batches,
  587 + test_score * 100.))
  588 +
  589 + if patience <= iter:
  590 + done_looping = True
  591 + break
  592 +
  593 + end_time = time.clock()
  594 + print('Optimization complete.')
  595 + print('Best validation score of %f %% obtained at iteration %i, '
  596 + 'with test performance %f %%' %
  597 + (best_validation_loss * 100., best_iter + 1, test_score * 100.))
  598 + print >> sys.stderr, ('The code for file ' +
  599 + os.path.split(__file__)[1] +
  600 + ' ran for %.2fm' % ((end_time - start_time) / 60.))
392 601  
393 602  
394 603  
... ...