__author__ = 'chunk'

from ...mfeat import *
from ...mmodel import *
from ...mspark import SC
from ...common import *
from .theanoutil import *

import numpy as np
from sklearn import cross_validation

from theano import function, config, shared, sandbox
import theano.tensor as T

import gzip
import cPickle

package_dir = os.path.dirname(os.path.abspath(__file__))


class ModelTHEANO(ModelBase):
    """
    Some notes:

    1.<http://deeplearning.net/software/theano/faq.html>
    Error allocating 1411344000 bytes of device memory (out of memory). Driver report 203563008 bytes free and 3220897792 bytes total

    This scenario arises when an operation requires allocation of a large contiguous block of memory but no blocks of sufficient size are available.
    GPUs do not have virtual memory and as such all allocations must be assigned to a continuous memory region. CPUs do not have this limitation because or their support for virtual memory. Multiple allocations on a GPU can result in memory fragmentation which can makes it more difficult to find contiguous regions of memory of sufficient size during subsequent memory allocations.


    """

    def __init__(self, toolset='cnn', sc=None):
        ModelBase.__init__(self)
        self.toolset = toolset
        self.sparker = sc
        self.model = None

    def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
                   learning_rate=0.4, n_epochs=200,
                   nkerns=[20, 50],
                   batch_size=300):

        # return example_cnn_ilscrop(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
        # batch_size=batch_size)
        print "* dataset:", dataset
        print "* learn-rate:", learning_rate
        print "* n_epochs:", n_epochs
        print "* nkerns:", nkerns
        print "* batch_size:", batch_size
        if X != None and Y != None:
            print "* data_shape:", len(X), len(Y)

        X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)

        train_set_x, train_set_y = shared_dataset((X_train, Y_train))
        valid_set_x, valid_set_y = shared_dataset((X_train[:1200], Y_train[:1200]))
        test_set_x, test_set_y = shared_dataset((X_test, Y_test))

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_train_batches /= batch_size
        n_valid_batches /= batch_size
        n_test_batches /= batch_size

        print train_set_x.get_value(borrow=True).shape, train_set_y.shape

        rng = np.random.RandomState(12306)
        index = T.lscalar()  # index to a [mini]batch
        # start-snippet-1
        x = T.matrix('x')  # the data is presented as rasterized images
        y = T.ivector('y')  # the labels are presented as 1D vector of
        # [int] labels

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        print '... building the model'

        layer0_input = x.reshape((batch_size, 1, 304, 304))

        # Construct the first convolutional pooling layer:
        # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
        # maxpooling reduces this further to (297/2, 297/2) = (148, 148)
        # 4D output tensor is thus of shape (batch_size, nkerns[0], 148, 148)
        layer0 = ConvPoolLayer(
            rng,
            input=layer0_input,
            image_shape=(batch_size, 1, 304, 304),
            filter_shape=(nkerns[0], 1, 8, 8),
            poolsize=(2, 2)
        )

        # Construct the second convolutional pooling layer
        # filtering reduces the image size to (148-5+1, 148-5+1) = (144, 144)
        # maxpooling reduces this further to (144/4, 144/4) = (36, 36)
        # 4D output tensor is thus of shape (batch_size, nkerns[1], 36, 36)
        layer1 = ConvPoolLayer(
            rng,
            input=layer0.output,
            image_shape=(batch_size, nkerns[0], 148, 148),
            filter_shape=(nkerns[1], nkerns[0], 5, 5),
            poolsize=(4, 4)
        )

        # the HiddenLayer being fully-connected, it operates on 2D matrices of
        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
        # This will generate a matrix of shape (batch_size, nkerns[1] * 36 * 36),
        # or (500, 50 * 36 * 36) = (500, 800) with the default values.
        layer2_input = layer1.output.flatten(2)

        # construct a fully-connected sigmoidal layer
        layer2 = HiddenLayer(
            rng,
            input=layer2_input,
            n_in=nkerns[1] * 36 * 36,
            n_out=100,
            activation=T.tanh
        )

        # classify the values of the fully-connected sigmoidal layer
        layer3 = LogisticRegression(input=layer2.output, n_in=100, n_out=2)

        # the cost we minimize during training is the NLL of the model
        cost = layer3.negative_log_likelihood(y)

        # create a function to compute the mistakes that are made by the model
        test_model = theano.function(
            [index],
            layer3.errors(y),
            givens={
                x: test_set_x[index * batch_size: (index + 1) * batch_size],
                y: test_set_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        validate_model = theano.function(
            [index],
            layer3.errors(y),
            givens={
                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
                y: valid_set_y[index * batch_size: (index + 1) * batch_size]
            }
        )

        # create a list of all model parameters to be fit by gradient descent
        params = layer3.params + layer2.params + layer1.params + layer0.params

        # create a list of gradients for all model parameters
        grads = T.grad(cost, params)

        # train_model is a function that updates the model parameters by
        # SGD Since this model has many parameters, it would be tedious to
        # manually create an update rule for each model parameter. We thus
        # create the updates list by automatically looping over all
        # (params[i], grads[i]) pairs.
        updates = [
            (param_i, param_i - learning_rate * grad_i)
            for param_i, grad_i in zip(params, grads)
        ]
        """
        Total Parameters:
        >>> 20 * 64 + 1000 * 25 + 50 * 36 * 36 * 500 + 500 * 2
        32427280
        """
        train_model = theano.function(
            [index],
            cost,
            updates=updates,
            givens={
                x: train_set_x[index * batch_size: (index + 1) * batch_size],
                y: train_set_y[index * batch_size: (index + 1) * batch_size]
            }
        )
        # end-snippet-1

        ###############
        # TRAIN MODEL #
        ###############
        print '... training'
        # early-stopping parameters
        patience = 10000  # look as this many examples regardless
        patience_increase = 2  # wait this much longer when a new best is
        # found
        improvement_threshold = 0.995  # a relative improvement of this much is
        # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
        # go through this many
        # minibatche before checking the network
        # on the validation set; in this case we
        # check every epoch

        best_validation_loss = np.inf
        best_iter = 0
        test_score = 0.
        start_time = time.clock()

        epoch = 0
        done_looping = False

        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            for minibatch_index in xrange(n_train_batches):

                iter = (epoch - 1) * n_train_batches + minibatch_index

                # if iter % 100 == 0:
                print 'training @ iter = ', iter
                cost_ij = train_model(minibatch_index)

                if (iter + 1) % validation_frequency == 0:

                    # compute zero-one loss on validation set
                    validation_losses = [validate_model(i) for i
                                         in xrange(n_valid_batches)]
                    this_validation_loss = np.mean(validation_losses)
                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
                          (epoch, minibatch_index + 1, n_train_batches,
                           this_validation_loss * 100.))

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:

                        #improve patience if loss improvement is good enough
                        if this_validation_loss < best_validation_loss * \
                                improvement_threshold:
                            patience = max(patience, iter * patience_increase)

                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter

                        # test it on the test set
                        test_losses = [
                            test_model(i)
                            for i in xrange(n_test_batches)
                        ]
                        test_score = np.mean(test_losses)
                        print(('     epoch %i, minibatch %i/%i, test error of '
                               'best model %f %%') %
                              (epoch, minibatch_index + 1, n_train_batches,
                               test_score * 100.))

                if patience <= iter:
                    done_looping = True
                    break

        end_time = time.clock()
        print('Optimization complete.')
        print('Best validation score of %f %% obtained at iteration %i, '
              'with test performance %f %%' %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print >> sys.stderr, ('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))


    def train(self, X, Y):
        if self.toolset == 'cnn':
            return self._train_cnn(X, Y)
        else:
            raise Exception("Unknown toolset!")


    def predict(self, feat, model=None):
        if self.toolset == 'cnn':
            return self._predict_cnn(feat, model)
        else:
            raise Exception("Unknown toolset!")


    def test(self, X, Y, model=None):
        if self.toolset == 'cnn':
            return self._test_cnn(X, Y, model)
        else:
            raise Exception("Unknown toolset!")