staged.

Chunk
1 parent b2b2636c
Showing 2 changed files with 222 additions and 182 deletions Show diff stats
mmodel/theano/THEANO.py
mmodel/theano/theanoutil.py
@@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase):
  
  
     """
+
     def __init__(self, toolset='cnn', sc=None):
         ModelBase.__init__(self)
         self.toolset = toolset
@@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase):
                    nkerns=[20, 50, 50],
                    batch_size=400):
  
-        if X == None:
-            assert dataset != None
-            with open(dataset, 'rb') as f:
-                train_set, test_set = cPickle.load(f)
-
-            X_train, Y_train = train_set
-            X_test, Y_test = test_set
-        else:
-            X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
-
-        X_train, Y_train = self._shared_dataset((X_train, Y_train))
-        X_test, Y_test = self._shared_dataset((X_test, Y_test))
-
-        # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
-        # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
-        # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
-        # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
-
-        n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
-        n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
-
-        print X_train.get_value(borrow=True).shape, Y_train.shape
-
-        rng = np.random.RandomState(12306)
-        index = T.lscalar()
-        x = T.matrix('x')
-        y = T.ivector('y')
-
-        ######################
-        # BUILD ACTUAL MODEL #
-        ######################
-        print '... building the model'
-
-        layer0_input = x.reshape((batch_size, 1, 304, 304))
-
-        # Construct the first convolutional pooling layer:
-        # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
-        # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
-        # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
-        layer0 = ConvPoolLayer(
-            rng,
-            input=layer0_input,
-            image_shape=(batch_size, 1, 304, 304),
-            filter_shape=(nkerns[0], 1, 8, 8),
-            poolsize=(4, 4)
-        )
-
-        # Construct the second convolutional pooling layer
-        # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
-        # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
-        # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
-        layer1 = ConvPoolLayer(
-            rng,
-            input=layer0.output,
-            image_shape=(batch_size, nkerns[0], 74, 74),
-            filter_shape=(nkerns[1], nkerns[0], 8, 8),
-            poolsize=(4, 4)
-        )
-
-        # Construct the third convolutional pooling layer
-        # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
-        # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
-        # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
-        layer2 = ConvPoolLayer(
-            rng,
-            input=layer1.output,
-            image_shape=(batch_size, nkerns[1], 16, 16),
-            filter_shape=(nkerns[2], nkerns[1], 5, 5),
-            poolsize=(3, 3)
-        )
-
-        # the HiddenLayer being fully-connected, it operates on 2D matrices of
-        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
-        # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
-        # or (500, 50 * 4 * 4) = (500, 800) with the default values.
-        layer3_input = layer2.output.flatten(2)
-        # construct a fully-connected sigmoidal layer
-        layer3 = HiddenLayer(
-            rng,
-            input=layer3_input,
-            n_in=nkerns[2] * 4 * 4,
-            n_out=500,
-            activation=T.tanh
-        )
-        # classify the values of the fully-connected sigmoidal layer
-        layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
-
-        # the cost we minimize during training is the NLL of the model
-        cost = layer4.negative_log_likelihood(y)
-        params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
-        grads = T.grad(cost, params)
-        updates = [
-            (param_i, param_i - learning_rate * grad_i)
-            for param_i, grad_i in zip(params, grads)
-        ]
-
-        train_model = theano.function(
-            [index],
-            cost,
-            updates=updates,
-            givens={
-                x: X_train[index * batch_size: (index + 1) * batch_size],
-                y: Y_train[index * batch_size: (index + 1) * batch_size]
-            }
-        )
-
-        test_model = theano.function(
-            [index],
-            layer4.errors(y),
-            givens={
-                x: X_test[index * batch_size: (index + 1) * batch_size],
-                y: Y_test[index * batch_size: (index + 1) * batch_size]
-            }
-        )
-
-        ###############
-        # TRAIN MODEL #
-        ###############
-        print '... training'
-        # early-stopping parameters
-        patience = 10000  # look as this many examples regardless
-        patience_increase = 2  # wait this much longer when a new best is found
-        improvement_threshold = 0.995  # a relative improvement of this much is
-        # considered significant
-        validation_frequency = min(n_train_batches, patience / 2)
-        # go through this many
-        # minibatche before checking the network
-        # on the validation set; in this case we
-        # check every epoch
-
-        best_validation_loss = np.inf
-        best_iter = 0
-        test_score = 0.
-        start_time = time.clock()
-
-        epoch = 0
-        done_looping = False
-
-        while (epoch < n_epochs) and (not done_looping):
-            epoch = epoch + 1
-            for minibatch_index in xrange(n_train_batches):
-
-                iter = (epoch - 1) * n_train_batches + minibatch_index
-
-                # if iter % 100 == 0:
-                #     print 'training @ iter = ', iter
-                print 'training @ iter = ', iter
-                cost_ij = train_model(minibatch_index)
-
-                if (iter + 1) % validation_frequency == 0:
-
-                    # compute zero-one loss on validation set
-                    validation_losses = [test_model(i) for i in xrange(n_test_batches)]
-                    this_validation_loss = np.mean(validation_losses)
-                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
-                          (epoch, minibatch_index + 1, n_train_batches,
-                           this_validation_loss * 100.))
-
-                    # if we got the best validation score until now
-                    if this_validation_loss < best_validation_loss:
-
-                        # improve patience if loss improvement is good enough
-                        if this_validation_loss < best_validation_loss * \
-                                improvement_threshold:
-                            patience = max(patience, iter * patience_increase)
-
-                        # save best validation score and iteration number
-                        best_validation_loss = this_validation_loss
-                        best_iter = iter
-
-                if patience <= iter:
-                    done_looping = True
-                    break
-
-        end_time = time.clock()
-        print('Optimization complete.')
-        print('Best validation score of %f %% obtained at iteration %i, '
-              'with test performance %f %%' %
-              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-        print >> sys.stderr, ('The code for file ' +
-                              os.path.split(__file__)[1] +
-                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
+        return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
+                                 batch_size=batch_size)
  
  
     def train(self, X, Y):
@@ -4,12 +4,16 @@ import os, sys
 import time
  
 import numpy as np
+from sklearn import cross_validation
  
 import theano
 import theano.tensor as T
 from theano.tensor.signal import downsample
 from theano.tensor.nnet import conv
  
+import cPickle
+
+
 class LogisticRegression(object):
     """
     Multi-class Logistic Regression Class
@@ -164,8 +168,223 @@ class ConvPoolLayer(object):
         self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
         self.params = [self.W, self.b]
  
+def _shared_dataset(data_xy, borrow=True):
+    """ Function that loads the dataset into shared variables
  
+    The reason we store our dataset in shared variables is to allow
+    Theano to copy it into the GPU memory (when code is run on GPU).
+    Since copying data into the GPU is slow, copying a minibatch everytime
+    is needed (the default behaviour if the data is not in a shared
+    variable) would lead to a large decrease in performance.
+    """
+    data_x, data_y = data_xy
+    shared_x = theano.shared(np.asarray(data_x,
+                                        dtype=theano.config.floatX),
+                             borrow=borrow)
+    shared_y = theano.shared(np.asarray(data_y,
+                                        dtype=theano.config.floatX),
+                             borrow=borrow)
+    # When storing data on the GPU it has to be stored as floats
+    # therefore we will store the labels as ``floatX`` as well
+    # (``shared_y`` does exactly that). But during our computations
+    # we need them as ints (we use labels as index, and if they are
+    # floats it doesn't make sense) therefore instead of returning
+    # ``shared_y`` we will have to cast it to int. This little hack
+    # lets ous get around this issue
+    return shared_x, T.cast(shared_y, 'int32')
+
+def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
+               learning_rate=0.1, n_epochs=200,
+               nkerns=[20, 50, 50],
+               batch_size=400):
+
+    if X == None:
+        assert dataset != None
+        with open(dataset, 'rb') as f:
+            train_set, test_set = cPickle.load(f)
+
+        X_train, Y_train = train_set
+        X_test, Y_test = test_set
+    else:
+        X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
+
+    X_train, Y_train = _shared_dataset((X_train, Y_train))
+    X_test, Y_test = _shared_dataset((X_test, Y_test))
+
+    # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
+    # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
+    # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
+    # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
+
+    n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
+    n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
+
+    print X_train.get_value(borrow=True).shape, Y_train.shape
+
+    rng = np.random.RandomState(12306)
+    index = T.lscalar()
+    x = T.matrix('x')
+    y = T.ivector('y')
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+    layer0_input = x.reshape((batch_size, 1, 304, 304))
+
+    # Construct the first convolutional pooling layer:
+    # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
+    # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
+    # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
+    layer0 = ConvPoolLayer(
+        rng,
+        input=layer0_input,
+        image_shape=(batch_size, 1, 304, 304),
+        filter_shape=(nkerns[0], 1, 8, 8),
+        poolsize=(4, 4)
+    )
+
+    # Construct the second convolutional pooling layer
+    # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
+    # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
+    # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
+    layer1 = ConvPoolLayer(
+        rng,
+        input=layer0.output,
+        image_shape=(batch_size, nkerns[0], 74, 74),
+        filter_shape=(nkerns[1], nkerns[0], 8, 8),
+        poolsize=(4, 4)
+    )
+
+    # Construct the third convolutional pooling layer
+    # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
+    # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
+    # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
+    layer2 = ConvPoolLayer(
+        rng,
+        input=layer1.output,
+        image_shape=(batch_size, nkerns[1], 16, 16),
+        filter_shape=(nkerns[2], nkerns[1], 5, 5),
+        poolsize=(3, 3)
+    )
+
+    # the HiddenLayer being fully-connected, it operates on 2D matrices of
+    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
+    # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
+    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
+    layer3_input = layer2.output.flatten(2)
+    # construct a fully-connected sigmoidal layer
+    layer3 = HiddenLayer(
+        rng,
+        input=layer3_input,
+        n_in=nkerns[2] * 4 * 4,
+        n_out=500,
+        activation=T.tanh
+    )
+    # classify the values of the fully-connected sigmoidal layer
+    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
+
+    # the cost we minimize during training is the NLL of the model
+    cost = layer4.negative_log_likelihood(y)
+    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
+    grads = T.grad(cost, params)
+    updates = [
+        (param_i, param_i - learning_rate * grad_i)
+        for param_i, grad_i in zip(params, grads)
+    ]
  
+    """
+    Total Parameters:
+    >>> 20 * 64 + 1000 * 64 + 2500 * 25 + 50 * 16 * 500 + 500 * 2
+    528780
+    """
+    train_model = theano.function(
+        [index],
+        cost,
+        updates=updates,
+        givens={
+            x: X_train[index * batch_size: (index + 1) * batch_size],
+            y: Y_train[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    test_model = theano.function(
+        [index],
+        layer4.errors(y),
+        givens={
+            x: X_test[index * batch_size: (index + 1) * batch_size],
+            y: Y_test[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training'
+    # early-stopping parameters
+    patience = 10000  # look as this many examples regardless
+    patience_increase = 2  # wait this much longer when a new best is found
+    improvement_threshold = 0.995  # a relative improvement of this much is
+    # considered significant
+    validation_frequency = min(n_train_batches, patience / 2)
+    # go through this many
+    # minibatche before checking the network
+    # on the validation set; in this case we
+    # check every epoch
+
+    best_validation_loss = np.inf
+    best_iter = 0
+    test_score = 0.
+    start_time = time.clock()
+
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            iter = (epoch - 1) * n_train_batches + minibatch_index
+
+            # if iter % 100 == 0:
+            #     print 'training @ iter = ', iter
+            print 'training @ iter = ', iter
+            cost_ij = train_model(minibatch_index)
+
+            if (iter + 1) % validation_frequency == 0:
+
+                # compute zero-one loss on validation set
+                validation_losses = [test_model(i) for i in xrange(n_test_batches)]
+                this_validation_loss = np.mean(validation_losses)
+                print('epoch %i, minibatch %i/%i, validation error %f %%' %
+                      (epoch, minibatch_index + 1, n_train_batches,
+                       this_validation_loss * 100.))
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+
+                    # improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss * \
+                            improvement_threshold:
+                        patience = max(patience, iter * patience_increase)
+
+                    # save best validation score and iteration number
+                    best_validation_loss = this_validation_loss
+                    best_iter = iter
+
+            if patience <= iter:
+                done_looping = True
+                break
+
+    end_time = time.clock()
+    print('Optimization complete.')
+    print('Best validation score of %f %% obtained at iteration %i, '
+          'with test performance %f %%' %
+          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print >> sys.stderr, ('The code for file ' +
+                          os.path.split(__file__)[1] +
+                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
...	...	@@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase):
30	30
31	31
32	32	"""
	33	+
33	34	def __init__(self, toolset='cnn', sc=None):
34	35	ModelBase.__init__(self)
35	36	self.toolset = toolset
...	...	@@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase):
66	67	nkerns=[20, 50, 50],
67	68	batch_size=400):
68	69
69		- if X == None:
70		- assert dataset != None
71		- with open(dataset, 'rb') as f:
72		- train_set, test_set = cPickle.load(f)
73		-
74		- X_train, Y_train = train_set
75		- X_test, Y_test = test_set
76		- else:
77		- X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
78		-
79		- X_train, Y_train = self._shared_dataset((X_train, Y_train))
80		- X_test, Y_test = self._shared_dataset((X_test, Y_test))
81		-
82		- # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
83		- # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
84		- # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
85		- # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
86		-
87		- n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
88		- n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
89		-
90		- print X_train.get_value(borrow=True).shape, Y_train.shape
91		-
92		- rng = np.random.RandomState(12306)
93		- index = T.lscalar()
94		- x = T.matrix('x')
95		- y = T.ivector('y')
96		-
97		- ######################
98		- # BUILD ACTUAL MODEL #
99		- ######################
100		- print '... building the model'
101		-
102		- layer0_input = x.reshape((batch_size, 1, 304, 304))
103		-
104		- # Construct the first convolutional pooling layer:
105		- # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
106		- # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
107		- # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
108		- layer0 = ConvPoolLayer(
109		- rng,
110		- input=layer0_input,
111		- image_shape=(batch_size, 1, 304, 304),
112		- filter_shape=(nkerns[0], 1, 8, 8),
113		- poolsize=(4, 4)
114		- )
115		-
116		- # Construct the second convolutional pooling layer
117		- # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
118		- # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
119		- # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
120		- layer1 = ConvPoolLayer(
121		- rng,
122		- input=layer0.output,
123		- image_shape=(batch_size, nkerns[0], 74, 74),
124		- filter_shape=(nkerns[1], nkerns[0], 8, 8),
125		- poolsize=(4, 4)
126		- )
127		-
128		- # Construct the third convolutional pooling layer
129		- # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
130		- # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
131		- # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
132		- layer2 = ConvPoolLayer(
133		- rng,
134		- input=layer1.output,
135		- image_shape=(batch_size, nkerns[1], 16, 16),
136		- filter_shape=(nkerns[2], nkerns[1], 5, 5),
137		- poolsize=(3, 3)
138		- )
139		-
140		- # the HiddenLayer being fully-connected, it operates on 2D matrices of
141		- # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
142		- # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
143		- # or (500, 50 * 4 * 4) = (500, 800) with the default values.
144		- layer3_input = layer2.output.flatten(2)
145		- # construct a fully-connected sigmoidal layer
146		- layer3 = HiddenLayer(
147		- rng,
148		- input=layer3_input,
149		- n_in=nkerns[2] * 4 * 4,
150		- n_out=500,
151		- activation=T.tanh
152		- )
153		- # classify the values of the fully-connected sigmoidal layer
154		- layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
155		-
156		- # the cost we minimize during training is the NLL of the model
157		- cost = layer4.negative_log_likelihood(y)
158		- params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
159		- grads = T.grad(cost, params)
160		- updates = [
161		- (param_i, param_i - learning_rate * grad_i)
162		- for param_i, grad_i in zip(params, grads)
163		- ]
164		-
165		- train_model = theano.function(
166		- [index],
167		- cost,
168		- updates=updates,
169		- givens={
170		- x: X_train[index * batch_size: (index + 1) * batch_size],
171		- y: Y_train[index * batch_size: (index + 1) * batch_size]
172		- }
173		- )
174		-
175		- test_model = theano.function(
176		- [index],
177		- layer4.errors(y),
178		- givens={
179		- x: X_test[index * batch_size: (index + 1) * batch_size],
180		- y: Y_test[index * batch_size: (index + 1) * batch_size]
181		- }
182		- )
183		-
184		- ###############
185		- # TRAIN MODEL #
186		- ###############
187		- print '... training'
188		- # early-stopping parameters
189		- patience = 10000 # look as this many examples regardless
190		- patience_increase = 2 # wait this much longer when a new best is found
191		- improvement_threshold = 0.995 # a relative improvement of this much is
192		- # considered significant
193		- validation_frequency = min(n_train_batches, patience / 2)
194		- # go through this many
195		- # minibatche before checking the network
196		- # on the validation set; in this case we
197		- # check every epoch
198		-
199		- best_validation_loss = np.inf
200		- best_iter = 0
201		- test_score = 0.
202		- start_time = time.clock()
203		-
204		- epoch = 0
205		- done_looping = False
206		-
207		- while (epoch < n_epochs) and (not done_looping):
208		- epoch = epoch + 1
209		- for minibatch_index in xrange(n_train_batches):
210		-
211		- iter = (epoch - 1) * n_train_batches + minibatch_index
212		-
213		- # if iter % 100 == 0:
214		- # print 'training @ iter = ', iter
215		- print 'training @ iter = ', iter
216		- cost_ij = train_model(minibatch_index)
217		-
218		- if (iter + 1) % validation_frequency == 0:
219		-
220		- # compute zero-one loss on validation set
221		- validation_losses = [test_model(i) for i in xrange(n_test_batches)]
222		- this_validation_loss = np.mean(validation_losses)
223		- print('epoch %i, minibatch %i/%i, validation error %f %%' %
224		- (epoch, minibatch_index + 1, n_train_batches,
225		- this_validation_loss * 100.))
226		-
227		- # if we got the best validation score until now
228		- if this_validation_loss < best_validation_loss:
229		-
230		- # improve patience if loss improvement is good enough
231		- if this_validation_loss < best_validation_loss * \
232		- improvement_threshold:
233		- patience = max(patience, iter * patience_increase)
234		-
235		- # save best validation score and iteration number
236		- best_validation_loss = this_validation_loss
237		- best_iter = iter
238		-
239		- if patience <= iter:
240		- done_looping = True
241		- break
242		-
243		- end_time = time.clock()
244		- print('Optimization complete.')
245		- print('Best validation score of %f %% obtained at iteration %i, '
246		- 'with test performance %f %%' %
247		- (best_validation_loss * 100., best_iter + 1, test_score * 100.))
248		- print >> sys.stderr, ('The code for file ' +
249		- os.path.split(__file__)[1] +
250		- ' ran for %.2fm' % ((end_time - start_time) / 60.))
	70	+ return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
	71	+ batch_size=batch_size)
251	72
252	73
253	74	def train(self, X, Y):
...	...
...	...	@@ -4,12 +4,16 @@ import os, sys
4	4	import time
5	5
6	6	import numpy as np
	7	+from sklearn import cross_validation
7	8
8	9	import theano
9	10	import theano.tensor as T
10	11	from theano.tensor.signal import downsample
11	12	from theano.tensor.nnet import conv
12	13
	14	+import cPickle
	15	+
	16	+
13	17	class LogisticRegression(object):
14	18	"""
15	19	Multi-class Logistic Regression Class
...	...	@@ -164,8 +168,223 @@ class ConvPoolLayer(object):
164	168	self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
165	169	self.params = [self.W, self.b]
166	170
	171	+def _shared_dataset(data_xy, borrow=True):
	172	+ """ Function that loads the dataset into shared variables
167	173
	174	+ The reason we store our dataset in shared variables is to allow
	175	+ Theano to copy it into the GPU memory (when code is run on GPU).
	176	+ Since copying data into the GPU is slow, copying a minibatch everytime
	177	+ is needed (the default behaviour if the data is not in a shared
	178	+ variable) would lead to a large decrease in performance.
	179	+ """
	180	+ data_x, data_y = data_xy
	181	+ shared_x = theano.shared(np.asarray(data_x,
	182	+ dtype=theano.config.floatX),
	183	+ borrow=borrow)
	184	+ shared_y = theano.shared(np.asarray(data_y,
	185	+ dtype=theano.config.floatX),
	186	+ borrow=borrow)
	187	+ # When storing data on the GPU it has to be stored as floats
	188	+ # therefore we will store the labels as ``floatX`` as well
	189	+ # (``shared_y`` does exactly that). But during our computations
	190	+ # we need them as ints (we use labels as index, and if they are
	191	+ # floats it doesn't make sense) therefore instead of returning
	192	+ # ``shared_y`` we will have to cast it to int. This little hack
	193	+ # lets ous get around this issue
	194	+ return shared_x, T.cast(shared_y, 'int32')
	195	+
	196	+def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
	197	+ learning_rate=0.1, n_epochs=200,
	198	+ nkerns=[20, 50, 50],
	199	+ batch_size=400):
	200	+
	201	+ if X == None:
	202	+ assert dataset != None
	203	+ with open(dataset, 'rb') as f:
	204	+ train_set, test_set = cPickle.load(f)
	205	+
	206	+ X_train, Y_train = train_set
	207	+ X_test, Y_test = test_set
	208	+ else:
	209	+ X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
	210	+
	211	+ X_train, Y_train = _shared_dataset((X_train, Y_train))
	212	+ X_test, Y_test = _shared_dataset((X_test, Y_test))
	213	+
	214	+ # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
	215	+ # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
	216	+ # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
	217	+ # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
	218	+
	219	+ n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
	220	+ n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
	221	+
	222	+ print X_train.get_value(borrow=True).shape, Y_train.shape
	223	+
	224	+ rng = np.random.RandomState(12306)
	225	+ index = T.lscalar()
	226	+ x = T.matrix('x')
	227	+ y = T.ivector('y')
	228	+
	229	+ ######################
	230	+ # BUILD ACTUAL MODEL #
	231	+ ######################
	232	+ print '... building the model'
	233	+
	234	+ layer0_input = x.reshape((batch_size, 1, 304, 304))
	235	+
	236	+ # Construct the first convolutional pooling layer:
	237	+ # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
	238	+ # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
	239	+ # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
	240	+ layer0 = ConvPoolLayer(
	241	+ rng,
	242	+ input=layer0_input,
	243	+ image_shape=(batch_size, 1, 304, 304),
	244	+ filter_shape=(nkerns[0], 1, 8, 8),
	245	+ poolsize=(4, 4)
	246	+ )
	247	+
	248	+ # Construct the second convolutional pooling layer
	249	+ # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
	250	+ # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
	251	+ # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
	252	+ layer1 = ConvPoolLayer(
	253	+ rng,
	254	+ input=layer0.output,
	255	+ image_shape=(batch_size, nkerns[0], 74, 74),
	256	+ filter_shape=(nkerns[1], nkerns[0], 8, 8),
	257	+ poolsize=(4, 4)
	258	+ )
	259	+
	260	+ # Construct the third convolutional pooling layer
	261	+ # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
	262	+ # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
	263	+ # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
	264	+ layer2 = ConvPoolLayer(
	265	+ rng,
	266	+ input=layer1.output,
	267	+ image_shape=(batch_size, nkerns[1], 16, 16),
	268	+ filter_shape=(nkerns[2], nkerns[1], 5, 5),
	269	+ poolsize=(3, 3)
	270	+ )
	271	+
	272	+ # the HiddenLayer being fully-connected, it operates on 2D matrices of
	273	+ # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
	274	+ # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
	275	+ # or (500, 50 * 4 * 4) = (500, 800) with the default values.
	276	+ layer3_input = layer2.output.flatten(2)
	277	+ # construct a fully-connected sigmoidal layer
	278	+ layer3 = HiddenLayer(
	279	+ rng,
	280	+ input=layer3_input,
	281	+ n_in=nkerns[2] * 4 * 4,
	282	+ n_out=500,
	283	+ activation=T.tanh
	284	+ )
	285	+ # classify the values of the fully-connected sigmoidal layer
	286	+ layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
	287	+
	288	+ # the cost we minimize during training is the NLL of the model
	289	+ cost = layer4.negative_log_likelihood(y)
	290	+ params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
	291	+ grads = T.grad(cost, params)
	292	+ updates = [
	293	+ (param_i, param_i - learning_rate * grad_i)
	294	+ for param_i, grad_i in zip(params, grads)
	295	+ ]
168	296
	297	+ """
	298	+ Total Parameters:
	299	+ >>> 20 * 64 + 1000 * 64 + 2500 * 25 + 50 * 16 * 500 + 500 * 2
	300	+ 528780
	301	+ """
	302	+ train_model = theano.function(
	303	+ [index],
	304	+ cost,
	305	+ updates=updates,
	306	+ givens={
	307	+ x: X_train[index * batch_size: (index + 1) * batch_size],
	308	+ y: Y_train[index * batch_size: (index + 1) * batch_size]
	309	+ }
	310	+ )
	311	+
	312	+ test_model = theano.function(
	313	+ [index],
	314	+ layer4.errors(y),
	315	+ givens={
	316	+ x: X_test[index * batch_size: (index + 1) * batch_size],
	317	+ y: Y_test[index * batch_size: (index + 1) * batch_size]
	318	+ }
	319	+ )
	320	+
	321	+ ###############
	322	+ # TRAIN MODEL #
	323	+ ###############
	324	+ print '... training'
	325	+ # early-stopping parameters
	326	+ patience = 10000 # look as this many examples regardless
	327	+ patience_increase = 2 # wait this much longer when a new best is found
	328	+ improvement_threshold = 0.995 # a relative improvement of this much is
	329	+ # considered significant
	330	+ validation_frequency = min(n_train_batches, patience / 2)
	331	+ # go through this many
	332	+ # minibatche before checking the network
	333	+ # on the validation set; in this case we
	334	+ # check every epoch
	335	+
	336	+ best_validation_loss = np.inf
	337	+ best_iter = 0
	338	+ test_score = 0.
	339	+ start_time = time.clock()
	340	+
	341	+ epoch = 0
	342	+ done_looping = False
	343	+
	344	+ while (epoch < n_epochs) and (not done_looping):
	345	+ epoch = epoch + 1
	346	+ for minibatch_index in xrange(n_train_batches):
	347	+
	348	+ iter = (epoch - 1) * n_train_batches + minibatch_index
	349	+
	350	+ # if iter % 100 == 0:
	351	+ # print 'training @ iter = ', iter
	352	+ print 'training @ iter = ', iter
	353	+ cost_ij = train_model(minibatch_index)
	354	+
	355	+ if (iter + 1) % validation_frequency == 0:
	356	+
	357	+ # compute zero-one loss on validation set
	358	+ validation_losses = [test_model(i) for i in xrange(n_test_batches)]
	359	+ this_validation_loss = np.mean(validation_losses)
	360	+ print('epoch %i, minibatch %i/%i, validation error %f %%' %
	361	+ (epoch, minibatch_index + 1, n_train_batches,
	362	+ this_validation_loss * 100.))
	363	+
	364	+ # if we got the best validation score until now
	365	+ if this_validation_loss < best_validation_loss:
	366	+
	367	+ # improve patience if loss improvement is good enough
	368	+ if this_validation_loss < best_validation_loss * \
	369	+ improvement_threshold:
	370	+ patience = max(patience, iter * patience_increase)
	371	+
	372	+ # save best validation score and iteration number
	373	+ best_validation_loss = this_validation_loss
	374	+ best_iter = iter
	375	+
	376	+ if patience <= iter:
	377	+ done_looping = True
	378	+ break
	379	+
	380	+ end_time = time.clock()
	381	+ print('Optimization complete.')
	382	+ print('Best validation score of %f %% obtained at iteration %i, '
	383	+ 'with test performance %f %%' %
	384	+ (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	385	+ print >> sys.stderr, ('The code for file ' +
	386	+ os.path.split(__file__)[1] +
	387	+ ' ran for %.2fm' % ((end_time - start_time) / 60.))
169	388
170	389
171	390
...	...