Commit 4eac668051893fcef90749f43f9deebadda8c57a
1 parent
defa5614
Exists in
master
and in
1 other branch
3-conv-layer to 2-conv-layer model.
Showing
2 changed files
with
248 additions
and
44 deletions
Show diff stats
mmodel/theano/THEANO.py
| @@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase): | @@ -39,18 +39,17 @@ class ModelTHEANO(ModelBase): | ||
| 39 | 39 | ||
| 40 | def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'), | 40 | def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'), |
| 41 | learning_rate=0.1, n_epochs=200, | 41 | learning_rate=0.1, n_epochs=200, |
| 42 | - nkerns=[20, 50, 50], | 42 | + nkerns=[20, 50], |
| 43 | batch_size=400): | 43 | batch_size=400): |
| 44 | 44 | ||
| 45 | - # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns, | ||
| 46 | - # batch_size=batch_size) | 45 | + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns, |
| 46 | + # batch_size=batch_size) | ||
| 47 | 47 | ||
| 48 | - with gzip.open(dataset, 'rb') as f: | ||
| 49 | - train_set, valid_set, test_set = cPickle.load(f) | 48 | + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) |
| 50 | 49 | ||
| 51 | - train_set_x, train_set_y = shared_dataset(train_set) | ||
| 52 | - valid_set_x, valid_set_y = shared_dataset(valid_set) | ||
| 53 | - test_set_x, test_set_y = shared_dataset(test_set) | 50 | + train_set_x, train_set_y = shared_dataset((X_train, Y_train)) |
| 51 | + valid_set_x, valid_set_y = train_set_x[:1000], train_set_y[:1000] | ||
| 52 | + test_set_x, test_set_y = shared_dataset((X_test, Y_test)) | ||
| 54 | 53 | ||
| 55 | # compute number of minibatches for training, validation and testing | 54 | # compute number of minibatches for training, validation and testing |
| 56 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] | 55 | n_train_batches = train_set_x.get_value(borrow=True).shape[0] |
| @@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase): | @@ -60,44 +59,44 @@ class ModelTHEANO(ModelBase): | ||
| 60 | n_valid_batches /= batch_size | 59 | n_valid_batches /= batch_size |
| 61 | n_test_batches /= batch_size | 60 | n_test_batches /= batch_size |
| 62 | 61 | ||
| 63 | - print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape | 62 | + print train_set_x.get_value(borrow=True).shape, train_set_y.shape |
| 64 | 63 | ||
| 65 | rng = np.random.RandomState(12306) | 64 | rng = np.random.RandomState(12306) |
| 66 | index = T.lscalar() # index to a [mini]batch | 65 | index = T.lscalar() # index to a [mini]batch |
| 67 | # start-snippet-1 | 66 | # start-snippet-1 |
| 68 | - x = T.matrix('x') # the data is presented as rasterized images | 67 | + x = T.matrix('x') # the data is presented as rasterized images |
| 69 | y = T.ivector('y') # the labels are presented as 1D vector of | 68 | y = T.ivector('y') # the labels are presented as 1D vector of |
| 70 | - # [int] labels | 69 | + # [int] labels |
| 71 | 70 | ||
| 72 | ###################### | 71 | ###################### |
| 73 | # BUILD ACTUAL MODEL # | 72 | # BUILD ACTUAL MODEL # |
| 74 | ###################### | 73 | ###################### |
| 75 | print '... building the model' | 74 | print '... building the model' |
| 76 | 75 | ||
| 77 | - layer0_input = x.reshape((batch_size, 1, 28, 28)) | 76 | + layer0_input = x.reshape((batch_size, 1, 304, 304)) |
| 78 | 77 | ||
| 79 | # Construct the first convolutional pooling layer: | 78 | # Construct the first convolutional pooling layer: |
| 80 | - # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) | ||
| 81 | - # maxpooling reduces this further to (24/2, 24/2) = (12, 12) | ||
| 82 | - # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) | 79 | + # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297) |
| 80 | + # maxpooling reduces this further to (297/2, 297/2) = (148, 148) | ||
| 81 | + # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148) | ||
| 83 | layer0 = ConvPoolLayer( | 82 | layer0 = ConvPoolLayer( |
| 84 | rng, | 83 | rng, |
| 85 | input=layer0_input, | 84 | input=layer0_input, |
| 86 | - image_shape=(batch_size, 1, 28, 28), | ||
| 87 | - filter_shape=(nkerns[0], 1, 5, 5), | 85 | + image_shape=(batch_size, 1, 304, 304), |
| 86 | + filter_shape=(nkerns[0], 1, 8, 8), | ||
| 88 | poolsize=(2, 2) | 87 | poolsize=(2, 2) |
| 89 | ) | 88 | ) |
| 90 | 89 | ||
| 91 | # Construct the second convolutional pooling layer | 90 | # Construct the second convolutional pooling layer |
| 92 | - # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) | ||
| 93 | - # maxpooling reduces this further to (8/2, 8/2) = (4, 4) | ||
| 94 | - # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) | 91 | + # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144) |
| 92 | + # maxpooling reduces this further to (144/4, 144/4) = (38, 38) | ||
| 93 | + # 4D output tensor is thus of shape (batch_size, nkerns[1], 38, 38) | ||
| 95 | layer1 = ConvPoolLayer( | 94 | layer1 = ConvPoolLayer( |
| 96 | rng, | 95 | rng, |
| 97 | input=layer0.output, | 96 | input=layer0.output, |
| 98 | - image_shape=(batch_size, nkerns[0], 12, 12), | 97 | + image_shape=(batch_size, nkerns[0], 148, 148), |
| 99 | filter_shape=(nkerns[1], nkerns[0], 5, 5), | 98 | filter_shape=(nkerns[1], nkerns[0], 5, 5), |
| 100 | - poolsize=(2, 2) | 99 | + poolsize=(4, 4) |
| 101 | ) | 100 | ) |
| 102 | 101 | ||
| 103 | # the HiddenLayer being fully-connected, it operates on 2D matrices of | 102 | # the HiddenLayer being fully-connected, it operates on 2D matrices of |
| @@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase): | @@ -110,13 +109,13 @@ class ModelTHEANO(ModelBase): | ||
| 110 | layer2 = HiddenLayer( | 109 | layer2 = HiddenLayer( |
| 111 | rng, | 110 | rng, |
| 112 | input=layer2_input, | 111 | input=layer2_input, |
| 113 | - n_in=nkerns[1] * 4 * 4, | 112 | + n_in=nkerns[1] * 38 * 38, |
| 114 | n_out=500, | 113 | n_out=500, |
| 115 | activation=T.tanh | 114 | activation=T.tanh |
| 116 | ) | 115 | ) |
| 117 | 116 | ||
| 118 | # classify the values of the fully-connected sigmoidal layer | 117 | # classify the values of the fully-connected sigmoidal layer |
| 119 | - layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | 118 | + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=2) |
| 120 | 119 | ||
| 121 | # the cost we minimize during training is the NLL of the model | 120 | # the cost we minimize during training is the NLL of the model |
| 122 | cost = layer3.negative_log_likelihood(y) | 121 | cost = layer3.negative_log_likelihood(y) |
| @@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase): | @@ -155,7 +154,11 @@ class ModelTHEANO(ModelBase): | ||
| 155 | (param_i, param_i - learning_rate * grad_i) | 154 | (param_i, param_i - learning_rate * grad_i) |
| 156 | for param_i, grad_i in zip(params, grads) | 155 | for param_i, grad_i in zip(params, grads) |
| 157 | ] | 156 | ] |
| 158 | - | 157 | + """ |
| 158 | + Total Parameters: | ||
| 159 | + >>> 20 * 64 + 1000 * 25 + 50 * 38 * 38 * 500 + 500 * 2 | ||
| 160 | + 36127280 | ||
| 161 | + """ | ||
| 159 | train_model = theano.function( | 162 | train_model = theano.function( |
| 160 | [index], | 163 | [index], |
| 161 | cost, | 164 | cost, |
| @@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase): | @@ -174,14 +177,14 @@ class ModelTHEANO(ModelBase): | ||
| 174 | # early-stopping parameters | 177 | # early-stopping parameters |
| 175 | patience = 10000 # look as this many examples regardless | 178 | patience = 10000 # look as this many examples regardless |
| 176 | patience_increase = 2 # wait this much longer when a new best is | 179 | patience_increase = 2 # wait this much longer when a new best is |
| 177 | - # found | 180 | + # found |
| 178 | improvement_threshold = 0.995 # a relative improvement of this much is | 181 | improvement_threshold = 0.995 # a relative improvement of this much is |
| 179 | - # considered significant | 182 | + # considered significant |
| 180 | validation_frequency = min(n_train_batches, patience / 2) | 183 | validation_frequency = min(n_train_batches, patience / 2) |
| 181 | - # go through this many | ||
| 182 | - # minibatche before checking the network | ||
| 183 | - # on the validation set; in this case we | ||
| 184 | - # check every epoch | 184 | + # go through this many |
| 185 | + # minibatche before checking the network | ||
| 186 | + # on the validation set; in this case we | ||
| 187 | + # check every epoch | ||
| 185 | 188 | ||
| 186 | best_validation_loss = np.inf | 189 | best_validation_loss = np.inf |
| 187 | best_iter = 0 | 190 | best_iter = 0 |
| @@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase): | @@ -197,8 +200,8 @@ class ModelTHEANO(ModelBase): | ||
| 197 | 200 | ||
| 198 | iter = (epoch - 1) * n_train_batches + minibatch_index | 201 | iter = (epoch - 1) * n_train_batches + minibatch_index |
| 199 | 202 | ||
| 200 | - if iter % 100 == 0: | ||
| 201 | - print 'training @ iter = ', iter | 203 | + # if iter % 100 == 0: |
| 204 | + print 'training @ iter = ', iter | ||
| 202 | cost_ij = train_model(minibatch_index) | 205 | cost_ij = train_model(minibatch_index) |
| 203 | 206 | ||
| 204 | if (iter + 1) % validation_frequency == 0: | 207 | if (iter + 1) % validation_frequency == 0: |
| @@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase): | @@ -215,8 +218,8 @@ class ModelTHEANO(ModelBase): | ||
| 215 | if this_validation_loss < best_validation_loss: | 218 | if this_validation_loss < best_validation_loss: |
| 216 | 219 | ||
| 217 | #improve patience if loss improvement is good enough | 220 | #improve patience if loss improvement is good enough |
| 218 | - if this_validation_loss < best_validation_loss * \ | ||
| 219 | - improvement_threshold: | 221 | + if this_validation_loss < best_validation_loss * \ |
| 222 | + improvement_threshold: | ||
| 220 | patience = max(patience, iter * patience_increase) | 223 | patience = max(patience, iter * patience_increase) |
| 221 | 224 | ||
| 222 | # save best validation score and iteration number | 225 | # save best validation score and iteration number |
| @@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase): | @@ -248,14 +251,6 @@ class ModelTHEANO(ModelBase): | ||
| 248 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) | 251 | ' ran for %.2fm' % ((end_time - start_time) / 60.)) |
| 249 | 252 | ||
| 250 | 253 | ||
| 251 | - | ||
| 252 | - | ||
| 253 | - | ||
| 254 | - | ||
| 255 | - | ||
| 256 | - | ||
| 257 | - | ||
| 258 | - | ||
| 259 | def train(self, X, Y): | 254 | def train(self, X, Y): |
| 260 | if self.toolset == 'cnn': | 255 | if self.toolset == 'cnn': |
| 261 | return self._train_cnn(X, Y) | 256 | return self._train_cnn(X, Y) |
mmodel/theano/theanoutil.py
| @@ -11,6 +11,7 @@ import theano.tensor as T | @@ -11,6 +11,7 @@ import theano.tensor as T | ||
| 11 | from theano.tensor.signal import downsample | 11 | from theano.tensor.signal import downsample |
| 12 | from theano.tensor.nnet import conv | 12 | from theano.tensor.nnet import conv |
| 13 | 13 | ||
| 14 | +import gzip | ||
| 14 | import cPickle | 15 | import cPickle |
| 15 | 16 | ||
| 16 | 17 | ||
| @@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True): | @@ -193,7 +194,7 @@ def shared_dataset(data_xy, borrow=True): | ||
| 193 | # lets ous get around this issue | 194 | # lets ous get around this issue |
| 194 | return shared_x, T.cast(shared_y, 'int32') | 195 | return shared_x, T.cast(shared_y, 'int32') |
| 195 | 196 | ||
| 196 | -def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'), | 197 | +def example_cnn_ilscrop(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'), |
| 197 | learning_rate=0.1, n_epochs=200, | 198 | learning_rate=0.1, n_epochs=200, |
| 198 | nkerns=[20, 50, 50], | 199 | nkerns=[20, 50, 50], |
| 199 | batch_size=400): | 200 | batch_size=400): |
| @@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'il | @@ -388,7 +389,215 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'il | ||
| 388 | 389 | ||
| 389 | 390 | ||
| 390 | 391 | ||
| 392 | +def example_cnn_mnist(self, X=None, Y=None, dataset=os.path.join('', '../../res/', 'mnist.pkl.gz'), | ||
| 393 | + learning_rate=0.1, n_epochs=200, | ||
| 394 | + nkerns=[20, 50], | ||
| 395 | + batch_size=500): | ||
| 396 | + | ||
| 397 | + # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns, | ||
| 398 | + # batch_size=batch_size) | ||
| 399 | + | ||
| 400 | + with gzip.open(dataset, 'rb') as f: | ||
| 401 | + train_set, valid_set, test_set = cPickle.load(f) | ||
| 402 | + | ||
| 403 | + train_set_x, train_set_y = shared_dataset(train_set) | ||
| 404 | + valid_set_x, valid_set_y = shared_dataset(valid_set) | ||
| 405 | + test_set_x, test_set_y = shared_dataset(test_set) | ||
| 406 | + | ||
| 407 | + # compute number of minibatches for training, validation and testing | ||
| 408 | + n_train_batches = train_set_x.get_value(borrow=True).shape[0] | ||
| 409 | + n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] | ||
| 410 | + n_test_batches = test_set_x.get_value(borrow=True).shape[0] | ||
| 411 | + n_train_batches /= batch_size | ||
| 412 | + n_valid_batches /= batch_size | ||
| 413 | + n_test_batches /= batch_size | ||
| 414 | + | ||
| 415 | + print train_set_x.get_value(borrow=True).shape, train_set_y.shape | ||
| 416 | + | ||
| 417 | + rng = np.random.RandomState(12306) | ||
| 418 | + index = T.lscalar() # index to a [mini]batch | ||
| 419 | + # start-snippet-1 | ||
| 420 | + x = T.matrix('x') # the data is presented as rasterized images | ||
| 421 | + y = T.ivector('y') # the labels are presented as 1D vector of | ||
| 422 | + # [int] labels | ||
| 423 | + | ||
| 424 | + ###################### | ||
| 425 | + # BUILD ACTUAL MODEL # | ||
| 426 | + ###################### | ||
| 427 | + print '... building the model' | ||
| 428 | + | ||
| 429 | + layer0_input = x.reshape((batch_size, 1, 28, 28)) | ||
| 430 | + | ||
| 431 | + # Construct the first convolutional pooling layer: | ||
| 432 | + # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24) | ||
| 433 | + # maxpooling reduces this further to (24/2, 24/2) = (12, 12) | ||
| 434 | + # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12) | ||
| 435 | + layer0 = ConvPoolLayer( | ||
| 436 | + rng, | ||
| 437 | + input=layer0_input, | ||
| 438 | + image_shape=(batch_size, 1, 28, 28), | ||
| 439 | + filter_shape=(nkerns[0], 1, 5, 5), | ||
| 440 | + poolsize=(2, 2) | ||
| 441 | + ) | ||
| 442 | + | ||
| 443 | + # Construct the second convolutional pooling layer | ||
| 444 | + # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8) | ||
| 445 | + # maxpooling reduces this further to (8/2, 8/2) = (4, 4) | ||
| 446 | + # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4) | ||
| 447 | + layer1 = ConvPoolLayer( | ||
| 448 | + rng, | ||
| 449 | + input=layer0.output, | ||
| 450 | + image_shape=(batch_size, nkerns[0], 12, 12), | ||
| 451 | + filter_shape=(nkerns[1], nkerns[0], 5, 5), | ||
| 452 | + poolsize=(2, 2) | ||
| 453 | + ) | ||
| 454 | + | ||
| 455 | + # the HiddenLayer being fully-connected, it operates on 2D matrices of | ||
| 456 | + # shape (batch_size, num_pixels) (i.e matrix of rasterized images). | ||
| 457 | + # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), | ||
| 458 | + # or (500, 50 * 4 * 4) = (500, 800) with the default values. | ||
| 459 | + layer2_input = layer1.output.flatten(2) | ||
| 460 | + | ||
| 461 | + # construct a fully-connected sigmoidal layer | ||
| 462 | + layer2 = HiddenLayer( | ||
| 463 | + rng, | ||
| 464 | + input=layer2_input, | ||
| 465 | + n_in=nkerns[1] * 4 * 4, | ||
| 466 | + n_out=500, | ||
| 467 | + activation=T.tanh | ||
| 468 | + ) | ||
| 469 | + | ||
| 470 | + # classify the values of the fully-connected sigmoidal layer | ||
| 471 | + layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) | ||
| 472 | + | ||
| 473 | + # the cost we minimize during training is the NLL of the model | ||
| 474 | + cost = layer3.negative_log_likelihood(y) | ||
| 475 | + | ||
| 476 | + # create a function to compute the mistakes that are made by the model | ||
| 477 | + test_model = theano.function( | ||
| 478 | + [index], | ||
| 479 | + layer3.errors(y), | ||
| 480 | + givens={ | ||
| 481 | + x: test_set_x[index * batch_size: (index + 1) * batch_size], | ||
| 482 | + y: test_set_y[index * batch_size: (index + 1) * batch_size] | ||
| 483 | + } | ||
| 484 | + ) | ||
| 485 | + | ||
| 486 | + validate_model = theano.function( | ||
| 487 | + [index], | ||
| 488 | + layer3.errors(y), | ||
| 489 | + givens={ | ||
| 490 | + x: valid_set_x[index * batch_size: (index + 1) * batch_size], | ||
| 491 | + y: valid_set_y[index * batch_size: (index + 1) * batch_size] | ||
| 492 | + } | ||
| 493 | + ) | ||
| 494 | + | ||
| 495 | + # create a list of all model parameters to be fit by gradient descent | ||
| 496 | + params = layer3.params + layer2.params + layer1.params + layer0.params | ||
| 497 | + | ||
| 498 | + # create a list of gradients for all model parameters | ||
| 499 | + grads = T.grad(cost, params) | ||
| 500 | + | ||
| 501 | + # train_model is a function that updates the model parameters by | ||
| 502 | + # SGD Since this model has many parameters, it would be tedious to | ||
| 503 | + # manually create an update rule for each model parameter. We thus | ||
| 504 | + # create the updates list by automatically looping over all | ||
| 505 | + # (params[i], grads[i]) pairs. | ||
| 506 | + updates = [ | ||
| 507 | + (param_i, param_i - learning_rate * grad_i) | ||
| 508 | + for param_i, grad_i in zip(params, grads) | ||
| 509 | + ] | ||
| 510 | + | ||
| 511 | + train_model = theano.function( | ||
| 512 | + [index], | ||
| 513 | + cost, | ||
| 514 | + updates=updates, | ||
| 515 | + givens={ | ||
| 516 | + x: train_set_x[index * batch_size: (index + 1) * batch_size], | ||
| 517 | + y: train_set_y[index * batch_size: (index + 1) * batch_size] | ||
| 518 | + } | ||
| 519 | + ) | ||
| 520 | + # end-snippet-1 | ||
| 521 | + | ||
| 522 | + ############### | ||
| 523 | + # TRAIN MODEL # | ||
| 524 | + ############### | ||
| 525 | + print '... training' | ||
| 526 | + # early-stopping parameters | ||
| 527 | + patience = 10000 # look as this many examples regardless | ||
| 528 | + patience_increase = 2 # wait this much longer when a new best is | ||
| 529 | + # found | ||
| 530 | + improvement_threshold = 0.995 # a relative improvement of this much is | ||
| 531 | + # considered significant | ||
| 532 | + validation_frequency = min(n_train_batches, patience / 2) | ||
| 533 | + # go through this many | ||
| 534 | + # minibatche before checking the network | ||
| 535 | + # on the validation set; in this case we | ||
| 536 | + # check every epoch | ||
| 537 | + | ||
| 538 | + best_validation_loss = np.inf | ||
| 539 | + best_iter = 0 | ||
| 540 | + test_score = 0. | ||
| 541 | + start_time = time.clock() | ||
| 542 | + | ||
| 543 | + epoch = 0 | ||
| 544 | + done_looping = False | ||
| 545 | + | ||
| 546 | + while (epoch < n_epochs) and (not done_looping): | ||
| 547 | + epoch = epoch + 1 | ||
| 548 | + for minibatch_index in xrange(n_train_batches): | ||
| 549 | + | ||
| 550 | + iter = (epoch - 1) * n_train_batches + minibatch_index | ||
| 551 | + | ||
| 552 | + if iter % 100 == 0: | ||
| 553 | + print 'training @ iter = ', iter | ||
| 554 | + cost_ij = train_model(minibatch_index) | ||
| 391 | 555 | ||
| 556 | + if (iter + 1) % validation_frequency == 0: | ||
| 557 | + | ||
| 558 | + # compute zero-one loss on validation set | ||
| 559 | + validation_losses = [validate_model(i) for i | ||
| 560 | + in xrange(n_valid_batches)] | ||
| 561 | + this_validation_loss = np.mean(validation_losses) | ||
| 562 | + print('epoch %i, minibatch %i/%i, validation error %f %%' % | ||
| 563 | + (epoch, minibatch_index + 1, n_train_batches, | ||
| 564 | + this_validation_loss * 100.)) | ||
| 565 | + | ||
| 566 | + # if we got the best validation score until now | ||
| 567 | + if this_validation_loss < best_validation_loss: | ||
| 568 | + | ||
| 569 | + #improve patience if loss improvement is good enough | ||
| 570 | + if this_validation_loss < best_validation_loss * \ | ||
| 571 | + improvement_threshold: | ||
| 572 | + patience = max(patience, iter * patience_increase) | ||
| 573 | + | ||
| 574 | + # save best validation score and iteration number | ||
| 575 | + best_validation_loss = this_validation_loss | ||
| 576 | + best_iter = iter | ||
| 577 | + | ||
| 578 | + # test it on the test set | ||
| 579 | + test_losses = [ | ||
| 580 | + test_model(i) | ||
| 581 | + for i in xrange(n_test_batches) | ||
| 582 | + ] | ||
| 583 | + test_score = np.mean(test_losses) | ||
| 584 | + print((' epoch %i, minibatch %i/%i, test error of ' | ||
| 585 | + 'best model %f %%') % | ||
| 586 | + (epoch, minibatch_index + 1, n_train_batches, | ||
| 587 | + test_score * 100.)) | ||
| 588 | + | ||
| 589 | + if patience <= iter: | ||
| 590 | + done_looping = True | ||
| 591 | + break | ||
| 592 | + | ||
| 593 | + end_time = time.clock() | ||
| 594 | + print('Optimization complete.') | ||
| 595 | + print('Best validation score of %f %% obtained at iteration %i, ' | ||
| 596 | + 'with test performance %f %%' % | ||
| 597 | + (best_validation_loss * 100., best_iter + 1, test_score * 100.)) | ||
| 598 | + print >> sys.stderr, ('The code for file ' + | ||
| 599 | + os.path.split(__file__)[1] + | ||
| 600 | + ' ran for %.2fm' % ((end_time - start_time) / 60.)) | ||
| 392 | 601 | ||
| 393 | 602 | ||
| 394 | 603 |