__author__ = 'chunk' from ...mfeat import * from ...mmodel import * from ...mspark import SC from ...common import * from .theanoutil import * import numpy as np from sklearn import cross_validation class ModelTHEANO(ModelBase): def __init__(self, toolset='cnn', sc=None): ModelBase.__init__(self) self.toolset = toolset self.sparker = sc self.model = None def _train_cnn(X, Y, learning_rate=0.1, n_epochs=200, nkerns=[20, 50, 50], batch_size=200): X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) X_train, Y_train = np.array(X_train), np.array(Y_train) X_test, Y_test = np.array(X_test), np.array(Y_test) n_train_batches = X_train.shape[0] / batch_size n_test_batches = X_test.shape[0] / batch_size rng = np.random.RandomState("whoami") index = T.lscalar() x = T.matrix('x') y = T.ivector('y') ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' layer0_input = x.reshape((batch_size, 1, 304, 304)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297) # maxpooling reduces this further to (297/4, 297/4) = (74, 74) # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74) layer0 = ConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 304, 304), filter_shape=(nkerns[0], 1, 8, 8), poolsize=(4, 4) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67) # maxpooling reduces this further to (67/4, 67/4) = (16, 16) # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16) layer1 = ConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 74, 74), filter_shape=(nkerns[1], nkerns[0], 8, 8), poolsize=(4, 4) ) # Construct the third convolutional pooling layer # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12) # maxpooling reduces this further to (12/3, 12/3) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4) layer2 = ConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, nkerns[0], 16, 16), filter_shape=(nkerns[2], nkerns[1], 5, 5), poolsize=(3, 3) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * 4 * 4, n_out=500, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: X_train[index * batch_size: (index + 1) * batch_size], y: Y_train[index * batch_size: (index + 1) * batch_size] } ) test_model = theano.function( [index], layer4.errors(y), givens={ x: X_test[index * batch_size: (index + 1) * batch_size], y: Y_test[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [test_model(i) for i in xrange(n_test_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) def train(self, X, Y): if self.toolset == 'cnn': return self._train_cnn(X, Y) else: raise Exception("Unknown toolset!") def predict(self, feat, model=None): if self.toolset == 'cnn': return self._predict_cnn(feat, model) else: raise Exception("Unknown toolset!") def test(self, X, Y, model=None): if self.toolset == 'cnn': return self._test_cnn(X, Y, model) else: raise Exception("Unknown toolset!")