Commit 4eac668051893fcef90749f43f9deebadda8c57a
1 parent
defa5614
Exists in
master
and in
1 other branch
3-conv-layer to 2-conv-layer model.
Showing
2 changed files
with
248 additions
and
44 deletions
Show diff stats
mmodel/theano/THEANO.py
... | ... | @@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase): |
39 | 39 | |
40 | 40 | def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'), |
41 | 41 | learning_rate=0.1, n_epochs=200, |
42 | - nkerns=[20, 50, 50], | |
42 | + nkerns=[20, 50], | |
43 | 43 | batch_size=400): |
44 | 44 | |
45 | - # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns, | |
46 | - # batch_size=batch_size) | |
45 | + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns, | |
46 | + # batch_size=batch_size) | |
47 | 47 | |
48 | - with gzip.open(dataset, 'rb') as f: | |
49 | - train_set, valid_set, test_set = cPickle.load(f) | |
48 | + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) | |
50 | 49 | |
51 | - train_set_x, train_set_y = shared_dataset(train_set) | |
52 | - valid_set_x, valid_set_y = shared_dataset(valid_set) | |
53 | - test_set_x, test_set_y = shared_dataset(test_set) | |
50 | + train_set_x, train_set_y = shared_dataset((X_train, Y_train)) | |
51 | + valid_set_x, valid_set_y = train_set_x[:1000], train_set_y[:1000] | |
52 | + test_set_x, test_set_y = shared_dataset((X_test, Y_test)) | |
54 | 53 | |
55 | 54 | # compute number of minibatches for training, validation and testing |
56 | 55 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] |
... | ... | @@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase): |
60 | 59 | n_valid_batches /= batch_size |
61 | 60 | n_test_batches /= batch_size |
62 | 61 | |
63 | - print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape | |
62 | + print train_set_x.get_value(borrow=True).shape, train_set_y.shape | |
64 | 63 | |
65 | 64 | rng = np.random.RandomState(12306) |
66 | 65 | index = T.lscalar() # index to a [mini]batch |
67 | 66 | # start-snippet-1 |
68 | - x = T.matrix('x') # the data is presented as rasterized images | |
67 | + x = T.matrix('x') # the data is presented as rasterized images | |
69 | 68 | y = T.ivector('y') # the labels are presented as 1D vector of |
70 | - # [int] labels | |
69 | + # [int] labels | |
71 | 70 | |
72 | 71 | ###################### |
73 | 72 | # BUILD ACTUAL MODEL # |
74 | 73 | ###################### |
75 | 74 | print '... building the model' |
76 | 75 | |
77 | - layer0_input = x.reshape((batch_size, 1, 28, 28)) | |
76 | + layer0_input = x.reshape((batch_size, 1, 304, 304)) | |
78 | 77 | |
79 | 78 | # Construct the first convolutional pooling layer: |
80 | - # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) | |
81 | - # maxpooling reduces this further to (24/2, 24/2) = (12, 12) | |
82 | - # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) | |
79 | + # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297) | |
80 | + # maxpooling reduces this further to (297/2, 297/2) = (148, 148) | |
81 | + # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148) | |
83 | 82 | layer0 = ConvPoolLayer( |
84 | 83 | rng, |
85 | 84 | input=layer0_input, |
86 | - image_shape=(batch_size, 1, 28, 28), | |
87 | - filter_shape=(nkerns[0], 1, 5, 5), | |
85 | + image_shape=(batch_size, 1, 304, 304), | |
86 | + filter_shape=(nkerns[0], 1, 8, 8), | |
88 | 87 | poolsize=(2, 2) |
89 | 88 | ) |
90 | 89 | |
91 | 90 | # Construct the second convolutional pooling layer |
92 | - # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) | |
93 | - # maxpooling reduces this further to (8/2, 8/2) = (4, 4) | |
94 | - # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) | |
91 | + # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144) | |
92 | + # maxpooling reduces this further to (144/4, 144/4) = (38, 38) | |
93 | + # 4D output tensor is thus of shape (batch_size, nkerns[1], 38, 38) | |
95 | 94 | layer1 = ConvPoolLayer( |
96 | 95 | rng, |
97 | 96 | input=layer0.output, |
98 | - image_shape=(batch_size, nkerns[0], 12, 12), | |
97 | + image_shape=(batch_size, nkerns[0], 148, 148), | |
99 | 98 | filter_shape=(nkerns[1], nkerns[0], 5, 5), |
100 | - poolsize=(2, 2) | |
99 | + poolsize=(4, 4) | |
101 | 100 | ) |
102 | 101 | |
103 | 102 | # the HiddenLayer being fully-connected, it operates on 2D matrices of |
... | ... | @@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase): |
110 | 109 | layer2 = HiddenLayer( |
111 | 110 | rng, |
112 | 111 | input=layer2_input, |
113 | - n_in=nkerns[1] * 4 * 4, | |
112 | + n_in=nkerns[1] * 38 * 38, | |
114 | 113 | n_out=500, |
115 | 114 | activation=T.tanh |
116 | 115 | ) |
117 | 116 | |
118 | 117 | # classify the values of the fully-connected sigmoidal layer |
119 | - layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | |
118 | + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2) | |
120 | 119 | |
121 | 120 | # the cost we minimize during training is the NLL of the model |
122 | 121 | cost = layer3.negative_log_likelihood(y) |
... | ... | @@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase): |
155 | 154 | (param_i, param_i - learning_rate * grad_i) |
156 | 155 | for param_i, grad_i in zip(params, grads) |
157 | 156 | ] |
158 | - | |
157 | + """ | |
158 | + Total Parameters: | |
159 | + >>> 20 * 64 + 1000 * 25 + 50 * 38 * 38 * 500 + 500 * 2 | |
160 | + 36127280 | |
161 | + """ | |
159 | 162 | train_model = theano.function( |
160 | 163 | [index], |
161 | 164 | cost, |
... | ... | @@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase): |
174 | 177 | # early-stopping parameters |
175 | 178 | patience = 10000 # look as this many examples regardless |
176 | 179 | patience_increase = 2 # wait this much longer when a new best is |
177 | - # found | |
180 | + # found | |
178 | 181 | improvement_threshold = 0.995 # a relative improvement of this much is |
179 | - # considered significant | |
182 | + # considered significant | |
180 | 183 | validation_frequency = min(n_train_batches, patience / 2) |
181 | - # go through this many | |
182 | - # minibatche before checking the network | |
183 | - # on the validation set; in this case we | |
184 | - # check every epoch | |
184 | + # go through this many | |
185 | + # minibatche before checking the network | |
186 | + # on the validation set; in this case we | |
187 | + # check every epoch | |
185 | 188 | |
186 | 189 | best_validation_loss = np.inf |
187 | 190 | best_iter = 0 |
... | ... | @@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase): |
197 | 200 | |
198 | 201 | iter = (epoch - 1) * n_train_batches + minibatch_index |
199 | 202 | |
200 | - if iter % 100 == 0: | |
201 | - print 'training @ iter = ', iter | |
203 | + # if iter % 100 == 0: | |
204 | + print 'training @ iter = ', iter | |
202 | 205 | cost_ij = train_model(minibatch_index) |
203 | 206 | |
204 | 207 | if (iter + 1) % validation_frequency == 0: |
... | ... | @@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase): |
215 | 218 | if this_validation_loss < best_validation_loss: |
216 | 219 | |
217 | 220 | #improve patience if loss improvement is good enough |
218 | - if this_validation_loss < best_validation_loss * \ | |
219 | - improvement_threshold: | |
221 | + if this_validation_loss < best_validation_loss * \ | |
222 | + improvement_threshold: | |
220 | 223 | patience = max(patience, iter * patience_increase) |
221 | 224 | |
222 | 225 | # save best validation score and iteration number |
... | ... | @@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase): |
248 | 251 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) |
249 | 252 | |
250 | 253 | |
251 | - | |
252 | - | |
253 | - | |
254 | - | |
255 | - | |
256 | - | |
257 | - | |
258 | - | |
259 | 254 | def train(self, X, Y): |
260 | 255 | if self.toolset == 'cnn': |
261 | 256 | return self._train_cnn(X, Y) | ... | ... |
mmodel/theano/theanoutil.py
... | ... | @@ -11,6 +11,7 @@ import theano.tensor as T |
11 | 11 | from theano.tensor.signal import downsample |
12 | 12 | from theano.tensor.nnet import conv |
13 | 13 | |
14 | +import gzip | |
14 | 15 | import cPickle |
15 | 16 | |
16 | 17 | |
... | ... | @@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True): |
193 | 194 | # lets ous get around this issue |
194 | 195 | return shared_x, T.cast(shared_y, 'int32') |
195 | 196 | |
196 | -def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'), | |
197 | +def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'), | |
197 | 198 | learning_rate=0.1, n_epochs=200, |
198 | 199 | nkerns=[20, 50, 50], |
199 | 200 | batch_size=400): |
... | ... | @@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'il |
388 | 389 | |
389 | 390 | |
390 | 391 | |
392 | +def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'), | |
393 | + learning_rate=0.1, n_epochs=200, | |
394 | + nkerns=[20, 50], | |
395 | + batch_size=500): | |
396 | + | |
397 | + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns, | |
398 | + # batch_size=batch_size) | |
399 | + | |
400 | + with gzip.open(dataset, 'rb') as f: | |
401 | + train_set, valid_set, test_set = cPickle.load(f) | |
402 | + | |
403 | + train_set_x, train_set_y = shared_dataset(train_set) | |
404 | + valid_set_x, valid_set_y = shared_dataset(valid_set) | |
405 | + test_set_x, test_set_y = shared_dataset(test_set) | |
406 | + | |
407 | + # compute number of minibatches for training, validation and testing | |
408 | + n_train_batches = train_set_x.get_value(borrow=True).shape[0] | |
409 | + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] | |
410 | + n_test_batches = test_set_x.get_value(borrow=True).shape[0] | |
411 | + n_train_batches /= batch_size | |
412 | + n_valid_batches /= batch_size | |
413 | + n_test_batches /= batch_size | |
414 | + | |
415 | + print train_set_x.get_value(borrow=True).shape, train_set_y.shape | |
416 | + | |
417 | + rng = np.random.RandomState(12306) | |
418 | + index = T.lscalar() # index to a [mini]batch | |
419 | + # start-snippet-1 | |
420 | + x = T.matrix('x') # the data is presented as rasterized images | |
421 | + y = T.ivector('y') # the labels are presented as 1D vector of | |
422 | + # [int] labels | |
423 | + | |
424 | + ###################### | |
425 | + # BUILD ACTUAL MODEL # | |
426 | + ###################### | |
427 | + print '... building the model' | |
428 | + | |
429 | + layer0_input = x.reshape((batch_size, 1, 28, 28)) | |
430 | + | |
431 | + # Construct the first convolutional pooling layer: | |
432 | + # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) | |
433 | + # maxpooling reduces this further to (24/2, 24/2) = (12, 12) | |
434 | + # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) | |
435 | + layer0 = ConvPoolLayer( | |
436 | + rng, | |
437 | + input=layer0_input, | |
438 | + image_shape=(batch_size, 1, 28, 28), | |
439 | + filter_shape=(nkerns[0], 1, 5, 5), | |
440 | + poolsize=(2, 2) | |
441 | + ) | |
442 | + | |
443 | + # Construct the second convolutional pooling layer | |
444 | + # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) | |
445 | + # maxpooling reduces this further to (8/2, 8/2) = (4, 4) | |
446 | + # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) | |
447 | + layer1 = ConvPoolLayer( | |
448 | + rng, | |
449 | + input=layer0.output, | |
450 | + image_shape=(batch_size, nkerns[0], 12, 12), | |
451 | + filter_shape=(nkerns[1], nkerns[0], 5, 5), | |
452 | + poolsize=(2, 2) | |
453 | + ) | |
454 | + | |
455 | + # the HiddenLayer being fully-connected, it operates on 2D matrices of | |
456 | + # shape (batch_size, num_pixels) (i.e matrix of rasterized images). | |
457 | + # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), | |
458 | + # or (500, 50 * 4 * 4) = (500, 800) with the default values. | |
459 | + layer2_input = layer1.output.flatten(2) | |
460 | + | |
461 | + # construct a fully-connected sigmoidal layer | |
462 | + layer2 = HiddenLayer( | |
463 | + rng, | |
464 | + input=layer2_input, | |
465 | + n_in=nkerns[1] * 4 * 4, | |
466 | + n_out=500, | |
467 | + activation=T.tanh | |
468 | + ) | |
469 | + | |
470 | + # classify the values of the fully-connected sigmoidal layer | |
471 | + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | |
472 | + | |
473 | + # the cost we minimize during training is the NLL of the model | |
474 | + cost = layer3.negative_log_likelihood(y) | |
475 | + | |
476 | + # create a function to compute the mistakes that are made by the model | |
477 | + test_model = theano.function( | |
478 | + [index], | |
479 | + layer3.errors(y), | |
480 | + givens={ | |
481 | + x: test_set_x[index * batch_size: (index + 1) * batch_size], | |
482 | + y: test_set_y[index * batch_size: (index + 1) * batch_size] | |
483 | + } | |
484 | + ) | |
485 | + | |
486 | + validate_model = theano.function( | |
487 | + [index], | |
488 | + layer3.errors(y), | |
489 | + givens={ | |
490 | + x: valid_set_x[index * batch_size: (index + 1) * batch_size], | |
491 | + y: valid_set_y[index * batch_size: (index + 1) * batch_size] | |
492 | + } | |
493 | + ) | |
494 | + | |
495 | + # create a list of all model parameters to be fit by gradient descent | |
496 | + params = layer3.params + layer2.params + layer1.params + layer0.params | |
497 | + | |
498 | + # create a list of gradients for all model parameters | |
499 | + grads = T.grad(cost, params) | |
500 | + | |
501 | + # train_model is a function that updates the model parameters by | |
502 | + # SGD Since this model has many parameters, it would be tedious to | |
503 | + # manually create an update rule for each model parameter. We thus | |
504 | + # create the updates list by automatically looping over all | |
505 | + # (params[i], grads[i]) pairs. | |
506 | + updates = [ | |
507 | + (param_i, param_i - learning_rate * grad_i) | |
508 | + for param_i, grad_i in zip(params, grads) | |
509 | + ] | |
510 | + | |
511 | + train_model = theano.function( | |
512 | + [index], | |
513 | + cost, | |
514 | + updates=updates, | |
515 | + givens={ | |
516 | + x: train_set_x[index * batch_size: (index + 1) * batch_size], | |
517 | + y: train_set_y[index * batch_size: (index + 1) * batch_size] | |
518 | + } | |
519 | + ) | |
520 | + # end-snippet-1 | |
521 | + | |
522 | + ############### | |
523 | + # TRAIN MODEL # | |
524 | + ############### | |
525 | + print '... training' | |
526 | + # early-stopping parameters | |
527 | + patience = 10000 # look as this many examples regardless | |
528 | + patience_increase = 2 # wait this much longer when a new best is | |
529 | + # found | |
530 | + improvement_threshold = 0.995 # a relative improvement of this much is | |
531 | + # considered significant | |
532 | + validation_frequency = min(n_train_batches, patience / 2) | |
533 | + # go through this many | |
534 | + # minibatche before checking the network | |
535 | + # on the validation set; in this case we | |
536 | + # check every epoch | |
537 | + | |
538 | + best_validation_loss = np.inf | |
539 | + best_iter = 0 | |
540 | + test_score = 0. | |
541 | + start_time = time.clock() | |
542 | + | |
543 | + epoch = 0 | |
544 | + done_looping = False | |
545 | + | |
546 | + while (epoch < n_epochs) and (not done_looping): | |
547 | + epoch = epoch + 1 | |
548 | + for minibatch_index in xrange(n_train_batches): | |
549 | + | |
550 | + iter = (epoch - 1) * n_train_batches + minibatch_index | |
551 | + | |
552 | + if iter % 100 == 0: | |
553 | + print 'training @ iter = ', iter | |
554 | + cost_ij = train_model(minibatch_index) | |
391 | 555 | |
556 | + if (iter + 1) % validation_frequency == 0: | |
557 | + | |
558 | + # compute zero-one loss on validation set | |
559 | + validation_losses = [validate_model(i) for i | |
560 | + in xrange(n_valid_batches)] | |
561 | + this_validation_loss = np.mean(validation_losses) | |
562 | + print('epoch %i, minibatch %i/%i, validation error %f %%' % | |
563 | + (epoch, minibatch_index + 1, n_train_batches, | |
564 | + this_validation_loss * 100.)) | |
565 | + | |
566 | + # if we got the best validation score until now | |
567 | + if this_validation_loss < best_validation_loss: | |
568 | + | |
569 | + #improve patience if loss improvement is good enough | |
570 | + if this_validation_loss < best_validation_loss * \ | |
571 | + improvement_threshold: | |
572 | + patience = max(patience, iter * patience_increase) | |
573 | + | |
574 | + # save best validation score and iteration number | |
575 | + best_validation_loss = this_validation_loss | |
576 | + best_iter = iter | |
577 | + | |
578 | + # test it on the test set | |
579 | + test_losses = [ | |
580 | + test_model(i) | |
581 | + for i in xrange(n_test_batches) | |
582 | + ] | |
583 | + test_score = np.mean(test_losses) | |
584 | + print((' epoch %i, minibatch %i/%i, test error of ' | |
585 | + 'best model %f %%') % | |
586 | + (epoch, minibatch_index + 1, n_train_batches, | |
587 | + test_score * 100.)) | |
588 | + | |
589 | + if patience <= iter: | |
590 | + done_looping = True | |
591 | + break | |
592 | + | |
593 | + end_time = time.clock() | |
594 | + print('Optimization complete.') | |
595 | + print('Best validation score of %f %% obtained at iteration %i, ' | |
596 | + 'with test performance %f %%' % | |
597 | + (best_validation_loss * 100., best_iter + 1, test_score * 100.)) | |
598 | + print >> sys.stderr, ('The code for file ' + | |
599 | + os.path.split(__file__)[1] + | |
600 | + ' ran for %.2fm' % ((end_time - start_time) / 60.)) | |
392 | 601 | |
393 | 602 | |
394 | 603 | ... | ... |