__author__ = 'chunk' from ...mfeat import * from ...mmodel import * from ...mspark import SC from ...common import * from .theanoutil import * import numpy as np from sklearn import cross_validation from theano import function, config, shared, sandbox import theano.tensor as T import gzip import cPickle package_dir = os.path.dirname(os.path.abspath(__file__)) class ModelTHEANO(ModelBase): """ Some notes: 1. Error allocating 1411344000 bytes of device memory (out of memory). Driver report 203563008 bytes free and 3220897792 bytes total This scenario arises when an operation requires allocation of a large contiguous block of memory but no blocks of sufficient size are available. GPUs do not have virtual memory and as such all allocations must be assigned to a continuous memory region. CPUs do not have this limitation because or their support for virtual memory. Multiple allocations on a GPU can result in memory fragmentation which can makes it more difficult to find contiguous regions of memory of sufficient size during subsequent memory allocations. """ def __init__(self, toolset='cnn', sc=None): ModelBase.__init__(self) self.toolset = toolset self.sparker = sc self.model = None def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'), learning_rate=0.4, n_epochs=200, nkerns=[20, 50], batch_size=300): # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns, # batch_size=batch_size) print "* dataset:", dataset print "* learn-rate:", learning_rate print "* n_epochs:", n_epochs print "* nkerns:", nkerns print "* batch_size:", batch_size if X != None and Y != None: print "* data_shape:", len(X), len(Y) X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0) train_set_x, train_set_y = shared_dataset((X_train, Y_train)) valid_set_x, valid_set_y = shared_dataset((X_train[:1200], Y_train[:1200])) test_set_x, test_set_y = shared_dataset((X_test, Y_test)) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size print train_set_x.get_value(borrow=True).shape, train_set_y.shape rng = np.random.RandomState(12306) index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' layer0_input = x.reshape((batch_size, 1, 304, 304)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297) # maxpooling reduces this further to (297/2, 297/2) = (148, 148) # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148) layer0 = ConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 1, 304, 304), filter_shape=(nkerns[0], 1, 8, 8), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144) # maxpooling reduces this further to (144/4, 144/4) = (36, 36) # 4D output tensor is thus of shape (batch_size, nkerns[1], 36, 36) layer1 = ConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 148, 148), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(4, 4) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 36 * 36), # or (500, 50 * 36 * 36) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 36 * 36, n_out=100, activation=T.tanh ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] """ Total Parameters: >>> 20 * 64 + 1000 * 25 + 50 * 36 * 36 * 500 + 500 * 2 32427280 """ train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index # if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) def train(self, X, Y): if self.toolset == 'cnn': return self._train_cnn(X, Y) else: raise Exception("Unknown toolset!") def predict(self, feat, model=None): if self.toolset == 'cnn': return self._predict_cnn(feat, model) else: raise Exception("Unknown toolset!") def test(self, X, Y, model=None): if self.toolset == 'cnn': return self._test_cnn(X, Y, model) else: raise Exception("Unknown toolset!")