THEANO.py 8.12 KB
__author__ = 'chunk'

from ...mfeat import *
from ...mmodel import *
from ...mspark import SC
from ...common import *
from .theanoutil import *

import numpy as np
from sklearn import cross_validation

import gzip
import cPickle

package_dir = os.path.dirname(os.path.abspath(__file__))


class ModelTHEANO(ModelBase):
    def __init__(self, toolset='cnn', sc=None):
        ModelBase.__init__(self)
        self.toolset = toolset
        self.sparker = sc
        self.model = None

    def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'ils_crop.pkl'),
                   learning_rate=0.1, n_epochs=200,
                   nkerns=[20, 50, 50],
                   batch_size=200):

        if X == None:
            assert dataset != None
            with open(dataset, 'rb') as f:
                train_set, test_set = cPickle.load(f)

            X_train, Y_train = train_set
            X_test, Y_test = test_set
        else:
            X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)

        print type(X), type(X_train), type(X_train[0])
        return

        X_train, Y_train = np.array(X_train), np.array(Y_train)
        X_test, Y_test = np.array(X_test), np.array(Y_test)

        n_train_batches = X_train.shape[0] / batch_size
        n_test_batches = X_test.shape[0] / batch_size

        rng = np.random.RandomState("whoami")
        index = T.lscalar()
        x = T.matrix('x')
        y = T.ivector('y')

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the model'

        layer0_input = x.reshape((batch_size, 1, 304, 304))

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
        # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
        # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
        layer0 = ConvPoolLayer(
            rng,
            input=layer0_input,
            image_shape=(batch_size, 1, 304, 304),
            filter_shape=(nkerns[0], 1, 8, 8),
            poolsize=(4, 4)
        )

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
        # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
        # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
        layer1 = ConvPoolLayer(
            rng,
            input=layer0.output,
            image_shape=(batch_size, nkerns[0], 74, 74),
            filter_shape=(nkerns[1], nkerns[0], 8, 8),
            poolsize=(4, 4)
        )

        # Construct the third convolutional pooling layer
        # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
        # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
        # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
        layer2 = ConvPoolLayer(
            rng,
            input=layer1.output,
            image_shape=(batch_size, nkerns[0], 16, 16),
            filter_shape=(nkerns[2], nkerns[1], 5, 5),
            poolsize=(3, 3)
        )

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
        # or (500, 50 * 4 * 4) = (500, 800) with the default values.
        layer3_input = layer2.output.flatten(2)
        # construct a fully-connected sigmoidal layer
        layer3 = HiddenLayer(
            rng,
            input=layer3_input,
            n_in=nkerns[2] * 4 * 4,
            n_out=500,
            activation=T.tanh
        )
        # classify the values of the fully-connected sigmoidal layer
        layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)

        # the cost we minimize during training is the NLL of the model
        cost = layer4.negative_log_likelihood(y)
        params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
        grads = T.grad(cost, params)
        updates = [
            (param_i, param_i - learning_rate * grad_i)
            for param_i, grad_i in zip(params, grads)
        ]

        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: X_train[index * batch_size: (index + 1) * batch_size],
                y: Y_train[index * batch_size: (index + 1) * batch_size]
            }
        )

        test_model = theano.function(
            [index],
            layer4.errors(y),
            givens={
                x: X_test[index * batch_size: (index + 1) * batch_size],
                y: Y_test[index * batch_size: (index + 1) * batch_size]
            }
        )

        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        # early-stopping parameters
        patience = 10000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is found
        improvement_threshold = 0.995  # a relative improvement of this much is
        # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
        # go through this many
        # minibatche before checking the network
        # on the validation set; in this case we
        # check every epoch

        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0.
        start_time = time.clock()

        epoch = 0
        done_looping = False

        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            for minibatch_index in xrange(n_train_batches):

                iter = (epoch - 1) * n_train_batches + minibatch_index

                if iter % 100 == 0:
                    print 'training @ iter = ', iter
                cost_ij = train_model(minibatch_index)

                if (iter + 1) % validation_frequency == 0:

                    # compute zero-one loss on validation set
                    validation_losses = [test_model(i) for i in xrange(n_test_batches)]
                    this_validation_loss = np.mean(validation_losses)
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:

                        # improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss * \
                                improvement_threshold:
                            patience = max(patience, iter * patience_increase)

                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter

                if patience <= iter:
                    done_looping = True
                    break

        end_time = time.clock()
        print('Optimization complete.')
        print('Best validation score of %f %% obtained at iteration %i, '
              'with test performance %f %%' %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))


    def train(self, X, Y):
        if self.toolset == 'cnn':
            return self._train_cnn(X, Y)
        else:
            raise Exception("Unknown toolset!")


    def predict(self, feat, model=None):
        if self.toolset == 'cnn':
            return self._predict_cnn(feat, model)
        else:
            raise Exception("Unknown toolset!")


    def test(self, X, Y, model=None):
        if self.toolset == 'cnn':
            return self._test_cnn(X, Y, model)
        else:
            raise Exception("Unknown toolset!")