staged.

Chunk
1 parent b2b2636c
Showing 2 changed files with 222 additions and 182 deletions Show diff stats
mmodel/theano/THEANO.py
mmodel/theano/theanoutil.py
@@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase):
     """
+
     def __init__(self, toolset='cnn', sc=None):
         ModelBase.__init__(self)
         self.toolset = toolset
@@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase):
                    nkerns=[20, 50, 50],
                    batch_size=400):
-        if X == None:
-            assert dataset != None
-            with open(dataset, 'rb') as f:
-                train_set, test_set = cPickle.load(f)
-
-            X_train, Y_train = train_set
-            X_test, Y_test = test_set
-        else:
-            X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
-
-        X_train, Y_train = self._shared_dataset((X_train, Y_train))
-        X_test, Y_test = self._shared_dataset((X_test, Y_test))
-
-        # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
-        # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
-        # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
-        # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
-
-        n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
-        n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
-
-        print X_train.get_value(borrow=True).shape, Y_train.shape
-
-        rng = np.random.RandomState(12306)
-        index = T.lscalar()
-        x = T.matrix('x')
-        y = T.ivector('y')
-
-        ######################
-        # BUILD ACTUAL MODEL #
-        ######################
-        print '... building the model'
-
-        layer0_input = x.reshape((batch_size, 1, 304, 304))
-
-        # Construct the first convolutional pooling layer:
-        # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
-        # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
-        # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
-        layer0 = ConvPoolLayer(
-            rng,
-            input=layer0_input,
-            image_shape=(batch_size, 1, 304, 304),
-            filter_shape=(nkerns[0], 1, 8, 8),
-            poolsize=(4, 4)
-        )
-
-        # Construct the second convolutional pooling layer
-        # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
-        # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
-        # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
-        layer1 = ConvPoolLayer(
-            rng,
-            input=layer0.output,
-            image_shape=(batch_size, nkerns[0], 74, 74),
-            filter_shape=(nkerns[1], nkerns[0], 8, 8),
-            poolsize=(4, 4)
-        )
-
-        # Construct the third convolutional pooling layer
-        # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
-        # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
-        # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
-        layer2 = ConvPoolLayer(
-            rng,
-            input=layer1.output,
-            image_shape=(batch_size, nkerns[1], 16, 16),
-            filter_shape=(nkerns[2], nkerns[1], 5, 5),
-            poolsize=(3, 3)
-        )
-
-        # the HiddenLayer being fully-connected, it operates on 2D matrices of
-        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
-        # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
-        # or (500, 50 * 4 * 4) = (500, 800) with the default values.
-        layer3_input = layer2.output.flatten(2)
-        # construct a fully-connected sigmoidal layer
-        layer3 = HiddenLayer(
-            rng,
-            input=layer3_input,
-            n_in=nkerns[2] * 4 * 4,
-            n_out=500,
-            activation=T.tanh
-        )
-        # classify the values of the fully-connected sigmoidal layer
-        layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
-
-        # the cost we minimize during training is the NLL of the model
-        cost = layer4.negative_log_likelihood(y)
-        params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
-        grads = T.grad(cost, params)
-        updates = [
-            (param_i, param_i - learning_rate * grad_i)
-            for param_i, grad_i in zip(params, grads)
-        ]
-
-        train_model = theano.function(
-            [index],
-            cost,
-            updates=updates,
-            givens={
-                x: X_train[index * batch_size: (index + 1) * batch_size],
-                y: Y_train[index * batch_size: (index + 1) * batch_size]
-            }
-        )
-
-        test_model = theano.function(
-            [index],
-            layer4.errors(y),
-            givens={
-                x: X_test[index * batch_size: (index + 1) * batch_size],
-                y: Y_test[index * batch_size: (index + 1) * batch_size]
-            }
-        )
-
-        ###############
-        # TRAIN MODEL #
-        ###############
-        print '... training'
-        # early-stopping parameters
-        patience = 10000  # look as this many examples regardless
-        patience_increase = 2  # wait this much longer when a new best is found
-        improvement_threshold = 0.995  # a relative improvement of this much is
-        # considered significant
-        validation_frequency = min(n_train_batches, patience / 2)
-        # go through this many
-        # minibatche before checking the network
-        # on the validation set; in this case we
-        # check every epoch
-
-        best_validation_loss = np.inf
-        best_iter = 0
-        test_score = 0.
-        start_time = time.clock()
-
-        epoch = 0
-        done_looping = False
-
-        while (epoch < n_epochs) and (not done_looping):
-            epoch = epoch + 1
-            for minibatch_index in xrange(n_train_batches):
-
-                iter = (epoch - 1) * n_train_batches + minibatch_index
-
-                # if iter % 100 == 0:
-                #     print 'training @ iter = ', iter
-                print 'training @ iter = ', iter
-                cost_ij = train_model(minibatch_index)
-
-                if (iter + 1) % validation_frequency == 0:
-
-                    # compute zero-one loss on validation set
-                    validation_losses = [test_model(i) for i in xrange(n_test_batches)]
-                    this_validation_loss = np.mean(validation_losses)
-                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
-                          (epoch, minibatch_index + 1, n_train_batches,
-                           this_validation_loss * 100.))
-
-                    # if we got the best validation score until now
-                    if this_validation_loss < best_validation_loss:
-
-                        # improve patience if loss improvement is good enough
-                        if this_validation_loss < best_validation_loss * \
-                                improvement_threshold:
-                            patience = max(patience, iter * patience_increase)
-
-                        # save best validation score and iteration number
-                        best_validation_loss = this_validation_loss
-                        best_iter = iter
-
-                if patience <= iter:
-                    done_looping = True
-                    break
-
-        end_time = time.clock()
-        print('Optimization complete.')
-        print('Best validation score of %f %% obtained at iteration %i, '
-              'with test performance %f %%' %
-              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
-        print >> sys.stderr, ('The code for file ' +
-                              os.path.split(__file__)[1] +
-                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
+        return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
+                                 batch_size=batch_size)
     def train(self, X, Y):
@@ -4,12 +4,16 @@ import os, sys
 import time
 import numpy as np
+from sklearn import cross_validation
 import theano
 import theano.tensor as T
 from theano.tensor.signal import downsample
 from theano.tensor.nnet import conv
+import cPickle
+
+
 class LogisticRegression(object):
     """
     Multi-class Logistic Regression Class
@@ -164,8 +168,223 @@ class ConvPoolLayer(object):
         self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
         self.params = [self.W, self.b]
+def _shared_dataset(data_xy, borrow=True):
+    """ Function that loads the dataset into shared variables
+    The reason we store our dataset in shared variables is to allow
+    Theano to copy it into the GPU memory (when code is run on GPU).
+    Since copying data into the GPU is slow, copying a minibatch everytime
+    is needed (the default behaviour if the data is not in a shared
+    variable) would lead to a large decrease in performance.
+    """
+    data_x, data_y = data_xy
+    shared_x = theano.shared(np.asarray(data_x,
+                                        dtype=theano.config.floatX),
+                             borrow=borrow)
+    shared_y = theano.shared(np.asarray(data_y,
+                                        dtype=theano.config.floatX),
+                             borrow=borrow)
+    # When storing data on the GPU it has to be stored as floats
+    # therefore we will store the labels as ``floatX`` as well
+    # (``shared_y`` does exactly that). But during our computations
+    # we need them as ints (we use labels as index, and if they are
+    # floats it doesn't make sense) therefore instead of returning
+    # ``shared_y`` we will have to cast it to int. This little hack
+    # lets ous get around this issue
+    return shared_x, T.cast(shared_y, 'int32')
+
+def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
+               learning_rate=0.1, n_epochs=200,
+               nkerns=[20, 50, 50],
+               batch_size=400):
+
+    if X == None:
+        assert dataset != None
+        with open(dataset, 'rb') as f:
+            train_set, test_set = cPickle.load(f)
+
+        X_train, Y_train = train_set
+        X_test, Y_test = test_set
+    else:
+        X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
+
+    X_train, Y_train = _shared_dataset((X_train, Y_train))
+    X_test, Y_test = _shared_dataset((X_test, Y_test))
+
+    # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
+    # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
+    # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
+    # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
+
+    n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
+    n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
+
+    print X_train.get_value(borrow=True).shape, Y_train.shape
+
+    rng = np.random.RandomState(12306)
+    index = T.lscalar()
+    x = T.matrix('x')
+    y = T.ivector('y')
+
+    ######################
+    # BUILD ACTUAL MODEL #
+    ######################
+    print '... building the model'
+
+    layer0_input = x.reshape((batch_size, 1, 304, 304))
+
+    # Construct the first convolutional pooling layer:
+    # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
+    # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
+    # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
+    layer0 = ConvPoolLayer(
+        rng,
+        input=layer0_input,
+        image_shape=(batch_size, 1, 304, 304),
+        filter_shape=(nkerns[0], 1, 8, 8),
+        poolsize=(4, 4)
+    )
+
+    # Construct the second convolutional pooling layer
+    # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
+    # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
+    # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
+    layer1 = ConvPoolLayer(
+        rng,
+        input=layer0.output,
+        image_shape=(batch_size, nkerns[0], 74, 74),
+        filter_shape=(nkerns[1], nkerns[0], 8, 8),
+        poolsize=(4, 4)
+    )
+
+    # Construct the third convolutional pooling layer
+    # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
+    # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
+    # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
+    layer2 = ConvPoolLayer(
+        rng,
+        input=layer1.output,
+        image_shape=(batch_size, nkerns[1], 16, 16),
+        filter_shape=(nkerns[2], nkerns[1], 5, 5),
+        poolsize=(3, 3)
+    )
+
+    # the HiddenLayer being fully-connected, it operates on 2D matrices of
+    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
+    # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
+    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
+    layer3_input = layer2.output.flatten(2)
+    # construct a fully-connected sigmoidal layer
+    layer3 = HiddenLayer(
+        rng,
+        input=layer3_input,
+        n_in=nkerns[2] * 4 * 4,
+        n_out=500,
+        activation=T.tanh
+    )
+    # classify the values of the fully-connected sigmoidal layer
+    layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
+
+    # the cost we minimize during training is the NLL of the model
+    cost = layer4.negative_log_likelihood(y)
+    params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
+    grads = T.grad(cost, params)
+    updates = [
+        (param_i, param_i - learning_rate * grad_i)
+        for param_i, grad_i in zip(params, grads)
+    ]
+    """
+    Total Parameters:
+    >>> 20 * 64 + 1000 * 64 + 2500 * 25 + 50 * 16 * 500 + 500 * 2
+    528780
+    """
+    train_model = theano.function(
+        [index],
+        cost,
+        updates=updates,
+        givens={
+            x: X_train[index * batch_size: (index + 1) * batch_size],
+            y: Y_train[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    test_model = theano.function(
+        [index],
+        layer4.errors(y),
+        givens={
+            x: X_test[index * batch_size: (index + 1) * batch_size],
+            y: Y_test[index * batch_size: (index + 1) * batch_size]
+        }
+    )
+
+    ###############
+    # TRAIN MODEL #
+    ###############
+    print '... training'
+    # early-stopping parameters
+    patience = 10000  # look as this many examples regardless
+    patience_increase = 2  # wait this much longer when a new best is found
+    improvement_threshold = 0.995  # a relative improvement of this much is
+    # considered significant
+    validation_frequency = min(n_train_batches, patience / 2)
+    # go through this many
+    # minibatche before checking the network
+    # on the validation set; in this case we
+    # check every epoch
+
+    best_validation_loss = np.inf
+    best_iter = 0
+    test_score = 0.
+    start_time = time.clock()
+
+    epoch = 0
+    done_looping = False
+
+    while (epoch < n_epochs) and (not done_looping):
+        epoch = epoch + 1
+        for minibatch_index in xrange(n_train_batches):
+
+            iter = (epoch - 1) * n_train_batches + minibatch_index
+
+            # if iter % 100 == 0:
+            #     print 'training @ iter = ', iter
+            print 'training @ iter = ', iter
+            cost_ij = train_model(minibatch_index)
+
+            if (iter + 1) % validation_frequency == 0:
+
+                # compute zero-one loss on validation set
+                validation_losses = [test_model(i) for i in xrange(n_test_batches)]
+                this_validation_loss = np.mean(validation_losses)
+                print('epoch %i, minibatch %i/%i, validation error %f %%' %
+                      (epoch, minibatch_index + 1, n_train_batches,
+                       this_validation_loss * 100.))
+
+                # if we got the best validation score until now
+                if this_validation_loss < best_validation_loss:
+
+                    # improve patience if loss improvement is good enough
+                    if this_validation_loss < best_validation_loss * \
+                            improvement_threshold:
+                        patience = max(patience, iter * patience_increase)
+
+                    # save best validation score and iteration number
+                    best_validation_loss = this_validation_loss
+                    best_iter = iter
+
+            if patience <= iter:
+                done_looping = True
+                break
+
+    end_time = time.clock()
+    print('Optimization complete.')
+    print('Best validation score of %f %% obtained at iteration %i, '
+          'with test performance %f %%' %
+          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+    print >> sys.stderr, ('The code for file ' +
+                          os.path.split(__file__)[1] +
+                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
	@@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase):		@@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase):
30		30
31		31
32	"""	32	"""
		33	+
33	def __init__(self, toolset='cnn', sc=None):	34	def __init__(self, toolset='cnn', sc=None):
34	ModelBase.__init__(self)	35	ModelBase.__init__(self)
35	self.toolset = toolset	36	self.toolset = toolset
	@@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase):		@@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase):
66	nkerns=[20, 50, 50],	67	nkerns=[20, 50, 50],
67	batch_size=400):	68	batch_size=400):
68		69
69	- if X == None:
70	- assert dataset != None
71	- with open(dataset, 'rb') as f:
72	- train_set, test_set = cPickle.load(f)
73	-
74	- X_train, Y_train = train_set
75	- X_test, Y_test = test_set
76	- else:
77	- X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
78	-
79	- X_train, Y_train = self._shared_dataset((X_train, Y_train))
80	- X_test, Y_test = self._shared_dataset((X_test, Y_test))
81	-
82	- # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
83	- # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
84	- # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
85	- # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
86	-
87	- n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
88	- n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
89	-
90	- print X_train.get_value(borrow=True).shape, Y_train.shape
91	-
92	- rng = np.random.RandomState(12306)
93	- index = T.lscalar()
94	- x = T.matrix('x')
95	- y = T.ivector('y')
96	-
97	- ######################
98	- # BUILD ACTUAL MODEL #
99	- ######################
100	- print '... building the model'
101	-
102	- layer0_input = x.reshape((batch_size, 1, 304, 304))
103	-
104	- # Construct the first convolutional pooling layer:
105	- # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
106	- # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
107	- # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
108	- layer0 = ConvPoolLayer(
109	- rng,
110	- input=layer0_input,
111	- image_shape=(batch_size, 1, 304, 304),
112	- filter_shape=(nkerns[0], 1, 8, 8),
113	- poolsize=(4, 4)
114	- )
115	-
116	- # Construct the second convolutional pooling layer
117	- # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
118	- # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
119	- # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
120	- layer1 = ConvPoolLayer(
121	- rng,
122	- input=layer0.output,
123	- image_shape=(batch_size, nkerns[0], 74, 74),
124	- filter_shape=(nkerns[1], nkerns[0], 8, 8),
125	- poolsize=(4, 4)
126	- )
127	-
128	- # Construct the third convolutional pooling layer
129	- # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
130	- # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
131	- # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
132	- layer2 = ConvPoolLayer(
133	- rng,
134	- input=layer1.output,
135	- image_shape=(batch_size, nkerns[1], 16, 16),
136	- filter_shape=(nkerns[2], nkerns[1], 5, 5),
137	- poolsize=(3, 3)
138	- )
139	-
140	- # the HiddenLayer being fully-connected, it operates on 2D matrices of
141	- # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
142	- # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
143	- # or (500, 50 * 4 * 4) = (500, 800) with the default values.
144	- layer3_input = layer2.output.flatten(2)
145	- # construct a fully-connected sigmoidal layer
146	- layer3 = HiddenLayer(
147	- rng,
148	- input=layer3_input,
149	- n_in=nkerns[2] * 4 * 4,
150	- n_out=500,
151	- activation=T.tanh
152	- )
153	- # classify the values of the fully-connected sigmoidal layer
154	- layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
155	-
156	- # the cost we minimize during training is the NLL of the model
157	- cost = layer4.negative_log_likelihood(y)
158	- params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
159	- grads = T.grad(cost, params)
160	- updates = [
161	- (param_i, param_i - learning_rate * grad_i)
162	- for param_i, grad_i in zip(params, grads)
163	- ]
164	-
165	- train_model = theano.function(
166	- [index],
167	- cost,
168	- updates=updates,
169	- givens={
170	- x: X_train[index * batch_size: (index + 1) * batch_size],
171	- y: Y_train[index * batch_size: (index + 1) * batch_size]
172	- }
173	- )
174	-
175	- test_model = theano.function(
176	- [index],
177	- layer4.errors(y),
178	- givens={
179	- x: X_test[index * batch_size: (index + 1) * batch_size],
180	- y: Y_test[index * batch_size: (index + 1) * batch_size]
181	- }
182	- )
183	-
184	- ###############
185	- # TRAIN MODEL #
186	- ###############
187	- print '... training'
188	- # early-stopping parameters
189	- patience = 10000 # look as this many examples regardless
190	- patience_increase = 2 # wait this much longer when a new best is found
191	- improvement_threshold = 0.995 # a relative improvement of this much is
192	- # considered significant
193	- validation_frequency = min(n_train_batches, patience / 2)
194	- # go through this many
195	- # minibatche before checking the network
196	- # on the validation set; in this case we
197	- # check every epoch
198	-
199	- best_validation_loss = np.inf
200	- best_iter = 0
201	- test_score = 0.
202	- start_time = time.clock()
203	-
204	- epoch = 0
205	- done_looping = False
206	-
207	- while (epoch < n_epochs) and (not done_looping):
208	- epoch = epoch + 1
209	- for minibatch_index in xrange(n_train_batches):
210	-
211	- iter = (epoch - 1) * n_train_batches + minibatch_index
212	-
213	- # if iter % 100 == 0:
214	- # print 'training @ iter = ', iter
215	- print 'training @ iter = ', iter
216	- cost_ij = train_model(minibatch_index)
217	-
218	- if (iter + 1) % validation_frequency == 0:
219	-
220	- # compute zero-one loss on validation set
221	- validation_losses = [test_model(i) for i in xrange(n_test_batches)]
222	- this_validation_loss = np.mean(validation_losses)
223	- print('epoch %i, minibatch %i/%i, validation error %f %%' %
224	- (epoch, minibatch_index + 1, n_train_batches,
225	- this_validation_loss * 100.))
226	-
227	- # if we got the best validation score until now
228	- if this_validation_loss < best_validation_loss:
229	-
230	- # improve patience if loss improvement is good enough
231	- if this_validation_loss < best_validation_loss * \
232	- improvement_threshold:
233	- patience = max(patience, iter * patience_increase)
234	-
235	- # save best validation score and iteration number
236	- best_validation_loss = this_validation_loss
237	- best_iter = iter
238	-
239	- if patience <= iter:
240	- done_looping = True
241	- break
242	-
243	- end_time = time.clock()
244	- print('Optimization complete.')
245	- print('Best validation score of %f %% obtained at iteration %i, '
246	- 'with test performance %f %%' %
247	- (best_validation_loss * 100., best_iter + 1, test_score * 100.))
248	- print >> sys.stderr, ('The code for file ' +
249	- os.path.split(__file__)[1] +
250	- ' ran for %.2fm' % ((end_time - start_time) / 60.))	70	+ return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
		71	+ batch_size=batch_size)
251		72
252		73
253	def train(self, X, Y):	74	def train(self, X, Y):
	@@ -4,12 +4,16 @@ import os, sys		@@ -4,12 +4,16 @@ import os, sys
4	import time	4	import time
5		5
6	import numpy as np	6	import numpy as np
		7	+from sklearn import cross_validation
7		8
8	import theano	9	import theano
9	import theano.tensor as T	10	import theano.tensor as T
10	from theano.tensor.signal import downsample	11	from theano.tensor.signal import downsample
11	from theano.tensor.nnet import conv	12	from theano.tensor.nnet import conv
12		13
		14	+import cPickle
		15	+
		16	+
13	class LogisticRegression(object):	17	class LogisticRegression(object):
14	"""	18	"""
15	Multi-class Logistic Regression Class	19	Multi-class Logistic Regression Class
	@@ -164,8 +168,223 @@ class ConvPoolLayer(object):		@@ -164,8 +168,223 @@ class ConvPoolLayer(object):
164	self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))	168	self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
165	self.params = [self.W, self.b]	169	self.params = [self.W, self.b]
166		170
		171	+def _shared_dataset(data_xy, borrow=True):
		172	+ """ Function that loads the dataset into shared variables
167		173
		174	+ The reason we store our dataset in shared variables is to allow
		175	+ Theano to copy it into the GPU memory (when code is run on GPU).
		176	+ Since copying data into the GPU is slow, copying a minibatch everytime
		177	+ is needed (the default behaviour if the data is not in a shared
		178	+ variable) would lead to a large decrease in performance.
		179	+ """
		180	+ data_x, data_y = data_xy
		181	+ shared_x = theano.shared(np.asarray(data_x,
		182	+ dtype=theano.config.floatX),
		183	+ borrow=borrow)
		184	+ shared_y = theano.shared(np.asarray(data_y,
		185	+ dtype=theano.config.floatX),
		186	+ borrow=borrow)
		187	+ # When storing data on the GPU it has to be stored as floats
		188	+ # therefore we will store the labels as ``floatX`` as well
		189	+ # (``shared_y`` does exactly that). But during our computations
		190	+ # we need them as ints (we use labels as index, and if they are
		191	+ # floats it doesn't make sense) therefore instead of returning
		192	+ # ``shared_y`` we will have to cast it to int. This little hack
		193	+ # lets ous get around this issue
		194	+ return shared_x, T.cast(shared_y, 'int32')
		195	+
		196	+def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
		197	+ learning_rate=0.1, n_epochs=200,
		198	+ nkerns=[20, 50, 50],
		199	+ batch_size=400):
		200	+
		201	+ if X == None:
		202	+ assert dataset != None
		203	+ with open(dataset, 'rb') as f:
		204	+ train_set, test_set = cPickle.load(f)
		205	+
		206	+ X_train, Y_train = train_set
		207	+ X_test, Y_test = test_set
		208	+ else:
		209	+ X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
		210	+
		211	+ X_train, Y_train = _shared_dataset((X_train, Y_train))
		212	+ X_test, Y_test = _shared_dataset((X_test, Y_test))
		213	+
		214	+ # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
		215	+ # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
		216	+ # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
		217	+ # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
		218	+
		219	+ n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
		220	+ n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
		221	+
		222	+ print X_train.get_value(borrow=True).shape, Y_train.shape
		223	+
		224	+ rng = np.random.RandomState(12306)
		225	+ index = T.lscalar()
		226	+ x = T.matrix('x')
		227	+ y = T.ivector('y')
		228	+
		229	+ ######################
		230	+ # BUILD ACTUAL MODEL #
		231	+ ######################
		232	+ print '... building the model'
		233	+
		234	+ layer0_input = x.reshape((batch_size, 1, 304, 304))
		235	+
		236	+ # Construct the first convolutional pooling layer:
		237	+ # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
		238	+ # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
		239	+ # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
		240	+ layer0 = ConvPoolLayer(
		241	+ rng,
		242	+ input=layer0_input,
		243	+ image_shape=(batch_size, 1, 304, 304),
		244	+ filter_shape=(nkerns[0], 1, 8, 8),
		245	+ poolsize=(4, 4)
		246	+ )
		247	+
		248	+ # Construct the second convolutional pooling layer
		249	+ # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
		250	+ # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
		251	+ # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
		252	+ layer1 = ConvPoolLayer(
		253	+ rng,
		254	+ input=layer0.output,
		255	+ image_shape=(batch_size, nkerns[0], 74, 74),
		256	+ filter_shape=(nkerns[1], nkerns[0], 8, 8),
		257	+ poolsize=(4, 4)
		258	+ )
		259	+
		260	+ # Construct the third convolutional pooling layer
		261	+ # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
		262	+ # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
		263	+ # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
		264	+ layer2 = ConvPoolLayer(
		265	+ rng,
		266	+ input=layer1.output,
		267	+ image_shape=(batch_size, nkerns[1], 16, 16),
		268	+ filter_shape=(nkerns[2], nkerns[1], 5, 5),
		269	+ poolsize=(3, 3)
		270	+ )
		271	+
		272	+ # the HiddenLayer being fully-connected, it operates on 2D matrices of
		273	+ # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
		274	+ # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
		275	+ # or (500, 50 * 4 * 4) = (500, 800) with the default values.
		276	+ layer3_input = layer2.output.flatten(2)
		277	+ # construct a fully-connected sigmoidal layer
		278	+ layer3 = HiddenLayer(
		279	+ rng,
		280	+ input=layer3_input,
		281	+ n_in=nkerns[2] * 4 * 4,
		282	+ n_out=500,
		283	+ activation=T.tanh
		284	+ )
		285	+ # classify the values of the fully-connected sigmoidal layer
		286	+ layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
		287	+
		288	+ # the cost we minimize during training is the NLL of the model
		289	+ cost = layer4.negative_log_likelihood(y)
		290	+ params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
		291	+ grads = T.grad(cost, params)
		292	+ updates = [
		293	+ (param_i, param_i - learning_rate * grad_i)
		294	+ for param_i, grad_i in zip(params, grads)
		295	+ ]
168		296
		297	+ """
		298	+ Total Parameters:
		299	+ >>> 20 * 64 + 1000 * 64 + 2500 * 25 + 50 * 16 * 500 + 500 * 2
		300	+ 528780
		301	+ """
		302	+ train_model = theano.function(
		303	+ [index],
		304	+ cost,
		305	+ updates=updates,
		306	+ givens={
		307	+ x: X_train[index * batch_size: (index + 1) * batch_size],
		308	+ y: Y_train[index * batch_size: (index + 1) * batch_size]
		309	+ }
		310	+ )
		311	+
		312	+ test_model = theano.function(
		313	+ [index],
		314	+ layer4.errors(y),
		315	+ givens={
		316	+ x: X_test[index * batch_size: (index + 1) * batch_size],
		317	+ y: Y_test[index * batch_size: (index + 1) * batch_size]
		318	+ }
		319	+ )
		320	+
		321	+ ###############
		322	+ # TRAIN MODEL #
		323	+ ###############
		324	+ print '... training'
		325	+ # early-stopping parameters
		326	+ patience = 10000 # look as this many examples regardless
		327	+ patience_increase = 2 # wait this much longer when a new best is found
		328	+ improvement_threshold = 0.995 # a relative improvement of this much is
		329	+ # considered significant
		330	+ validation_frequency = min(n_train_batches, patience / 2)
		331	+ # go through this many
		332	+ # minibatche before checking the network
		333	+ # on the validation set; in this case we
		334	+ # check every epoch
		335	+
		336	+ best_validation_loss = np.inf
		337	+ best_iter = 0
		338	+ test_score = 0.
		339	+ start_time = time.clock()
		340	+
		341	+ epoch = 0
		342	+ done_looping = False
		343	+
		344	+ while (epoch < n_epochs) and (not done_looping):
		345	+ epoch = epoch + 1
		346	+ for minibatch_index in xrange(n_train_batches):
		347	+
		348	+ iter = (epoch - 1) * n_train_batches + minibatch_index
		349	+
		350	+ # if iter % 100 == 0:
		351	+ # print 'training @ iter = ', iter
		352	+ print 'training @ iter = ', iter
		353	+ cost_ij = train_model(minibatch_index)
		354	+
		355	+ if (iter + 1) % validation_frequency == 0:
		356	+
		357	+ # compute zero-one loss on validation set
		358	+ validation_losses = [test_model(i) for i in xrange(n_test_batches)]
		359	+ this_validation_loss = np.mean(validation_losses)
		360	+ print('epoch %i, minibatch %i/%i, validation error %f %%' %
		361	+ (epoch, minibatch_index + 1, n_train_batches,
		362	+ this_validation_loss * 100.))
		363	+
		364	+ # if we got the best validation score until now
		365	+ if this_validation_loss < best_validation_loss:
		366	+
		367	+ # improve patience if loss improvement is good enough
		368	+ if this_validation_loss < best_validation_loss * \
		369	+ improvement_threshold:
		370	+ patience = max(patience, iter * patience_increase)
		371	+
		372	+ # save best validation score and iteration number
		373	+ best_validation_loss = this_validation_loss
		374	+ best_iter = iter
		375	+
		376	+ if patience <= iter:
		377	+ done_looping = True
		378	+ break
		379	+
		380	+ end_time = time.clock()
		381	+ print('Optimization complete.')
		382	+ print('Best validation score of %f %% obtained at iteration %i, '
		383	+ 'with test performance %f %%' %
		384	+ (best_validation_loss * 100., best_iter + 1, test_score * 100.))
		385	+ print >> sys.stderr, ('The code for file ' +
		386	+ os.path.split(__file__)[1] +
		387	+ ' ran for %.2fm' % ((end_time - start_time) / 60.))
169		388
170		389
171		390