Commit 3ef6ddf1034ad7bb6f5715622ca9c231af3f3084

Authored by Chunk
1 parent b2b2636c
Exists in master and in 1 other branch refactor

staged.

mmodel/theano/THEANO.py
@@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase): @@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase):
30 30
31 31
32 """ 32 """
  33 +
33 def __init__(self, toolset='cnn', sc=None): 34 def __init__(self, toolset='cnn', sc=None):
34 ModelBase.__init__(self) 35 ModelBase.__init__(self)
35 self.toolset = toolset 36 self.toolset = toolset
@@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase): @@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase):
66 nkerns=[20, 50, 50], 67 nkerns=[20, 50, 50],
67 batch_size=400): 68 batch_size=400):
68 69
69 - if X == None:  
70 - assert dataset != None  
71 - with open(dataset, 'rb') as f:  
72 - train_set, test_set = cPickle.load(f)  
73 -  
74 - X_train, Y_train = train_set  
75 - X_test, Y_test = test_set  
76 - else:  
77 - X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)  
78 -  
79 - X_train, Y_train = self._shared_dataset((X_train, Y_train))  
80 - X_test, Y_test = self._shared_dataset((X_test, Y_test))  
81 -  
82 - # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)  
83 - # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)  
84 - # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)  
85 - # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)  
86 -  
87 - n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size  
88 - n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size  
89 -  
90 - print X_train.get_value(borrow=True).shape, Y_train.shape  
91 -  
92 - rng = np.random.RandomState(12306)  
93 - index = T.lscalar()  
94 - x = T.matrix('x')  
95 - y = T.ivector('y')  
96 -  
97 - ######################  
98 - # BUILD ACTUAL MODEL #  
99 - ######################  
100 - print '... building the model'  
101 -  
102 - layer0_input = x.reshape((batch_size, 1, 304, 304))  
103 -  
104 - # Construct the first convolutional pooling layer:  
105 - # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)  
106 - # maxpooling reduces this further to (297/4, 297/4) = (74, 74)  
107 - # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)  
108 - layer0 = ConvPoolLayer(  
109 - rng,  
110 - input=layer0_input,  
111 - image_shape=(batch_size, 1, 304, 304),  
112 - filter_shape=(nkerns[0], 1, 8, 8),  
113 - poolsize=(4, 4)  
114 - )  
115 -  
116 - # Construct the second convolutional pooling layer  
117 - # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)  
118 - # maxpooling reduces this further to (67/4, 67/4) = (16, 16)  
119 - # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)  
120 - layer1 = ConvPoolLayer(  
121 - rng,  
122 - input=layer0.output,  
123 - image_shape=(batch_size, nkerns[0], 74, 74),  
124 - filter_shape=(nkerns[1], nkerns[0], 8, 8),  
125 - poolsize=(4, 4)  
126 - )  
127 -  
128 - # Construct the third convolutional pooling layer  
129 - # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)  
130 - # maxpooling reduces this further to (12/3, 12/3) = (4, 4)  
131 - # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)  
132 - layer2 = ConvPoolLayer(  
133 - rng,  
134 - input=layer1.output,  
135 - image_shape=(batch_size, nkerns[1], 16, 16),  
136 - filter_shape=(nkerns[2], nkerns[1], 5, 5),  
137 - poolsize=(3, 3)  
138 - )  
139 -  
140 - # the HiddenLayer being fully-connected, it operates on 2D matrices of  
141 - # shape (batch_size, num_pixels) (i.e matrix of rasterized images).  
142 - # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),  
143 - # or (500, 50 * 4 * 4) = (500, 800) with the default values.  
144 - layer3_input = layer2.output.flatten(2)  
145 - # construct a fully-connected sigmoidal layer  
146 - layer3 = HiddenLayer(  
147 - rng,  
148 - input=layer3_input,  
149 - n_in=nkerns[2] * 4 * 4,  
150 - n_out=500,  
151 - activation=T.tanh  
152 - )  
153 - # classify the values of the fully-connected sigmoidal layer  
154 - layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)  
155 -  
156 - # the cost we minimize during training is the NLL of the model  
157 - cost = layer4.negative_log_likelihood(y)  
158 - params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params  
159 - grads = T.grad(cost, params)  
160 - updates = [  
161 - (param_i, param_i - learning_rate * grad_i)  
162 - for param_i, grad_i in zip(params, grads)  
163 - ]  
164 -  
165 - train_model = theano.function(  
166 - [index],  
167 - cost,  
168 - updates=updates,  
169 - givens={  
170 - x: X_train[index * batch_size: (index + 1) * batch_size],  
171 - y: Y_train[index * batch_size: (index + 1) * batch_size]  
172 - }  
173 - )  
174 -  
175 - test_model = theano.function(  
176 - [index],  
177 - layer4.errors(y),  
178 - givens={  
179 - x: X_test[index * batch_size: (index + 1) * batch_size],  
180 - y: Y_test[index * batch_size: (index + 1) * batch_size]  
181 - }  
182 - )  
183 -  
184 - ###############  
185 - # TRAIN MODEL #  
186 - ###############  
187 - print '... training'  
188 - # early-stopping parameters  
189 - patience = 10000 # look as this many examples regardless  
190 - patience_increase = 2 # wait this much longer when a new best is found  
191 - improvement_threshold = 0.995 # a relative improvement of this much is  
192 - # considered significant  
193 - validation_frequency = min(n_train_batches, patience / 2)  
194 - # go through this many  
195 - # minibatche before checking the network  
196 - # on the validation set; in this case we  
197 - # check every epoch  
198 -  
199 - best_validation_loss = np.inf  
200 - best_iter = 0  
201 - test_score = 0.  
202 - start_time = time.clock()  
203 -  
204 - epoch = 0  
205 - done_looping = False  
206 -  
207 - while (epoch < n_epochs) and (not done_looping):  
208 - epoch = epoch + 1  
209 - for minibatch_index in xrange(n_train_batches):  
210 -  
211 - iter = (epoch - 1) * n_train_batches + minibatch_index  
212 -  
213 - # if iter % 100 == 0:  
214 - # print 'training @ iter = ', iter  
215 - print 'training @ iter = ', iter  
216 - cost_ij = train_model(minibatch_index)  
217 -  
218 - if (iter + 1) % validation_frequency == 0:  
219 -  
220 - # compute zero-one loss on validation set  
221 - validation_losses = [test_model(i) for i in xrange(n_test_batches)]  
222 - this_validation_loss = np.mean(validation_losses)  
223 - print('epoch %i, minibatch %i/%i, validation error %f %%' %  
224 - (epoch, minibatch_index + 1, n_train_batches,  
225 - this_validation_loss * 100.))  
226 -  
227 - # if we got the best validation score until now  
228 - if this_validation_loss < best_validation_loss:  
229 -  
230 - # improve patience if loss improvement is good enough  
231 - if this_validation_loss < best_validation_loss * \  
232 - improvement_threshold:  
233 - patience = max(patience, iter * patience_increase)  
234 -  
235 - # save best validation score and iteration number  
236 - best_validation_loss = this_validation_loss  
237 - best_iter = iter  
238 -  
239 - if patience <= iter:  
240 - done_looping = True  
241 - break  
242 -  
243 - end_time = time.clock()  
244 - print('Optimization complete.')  
245 - print('Best validation score of %f %% obtained at iteration %i, '  
246 - 'with test performance %f %%' %  
247 - (best_validation_loss * 100., best_iter + 1, test_score * 100.))  
248 - print >> sys.stderr, ('The code for file ' +  
249 - os.path.split(__file__)[1] +  
250 - ' ran for %.2fm' % ((end_time - start_time) / 60.)) 70 + return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
  71 + batch_size=batch_size)
251 72
252 73
253 def train(self, X, Y): 74 def train(self, X, Y):
mmodel/theano/theanoutil.py
@@ -4,12 +4,16 @@ import os, sys @@ -4,12 +4,16 @@ import os, sys
4 import time 4 import time
5 5
6 import numpy as np 6 import numpy as np
  7 +from sklearn import cross_validation
7 8
8 import theano 9 import theano
9 import theano.tensor as T 10 import theano.tensor as T
10 from theano.tensor.signal import downsample 11 from theano.tensor.signal import downsample
11 from theano.tensor.nnet import conv 12 from theano.tensor.nnet import conv
12 13
  14 +import cPickle
  15 +
  16 +
13 class LogisticRegression(object): 17 class LogisticRegression(object):
14 """ 18 """
15 Multi-class Logistic Regression Class 19 Multi-class Logistic Regression Class
@@ -164,8 +168,223 @@ class ConvPoolLayer(object): @@ -164,8 +168,223 @@ class ConvPoolLayer(object):
164 self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) 168 self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
165 self.params = [self.W, self.b] 169 self.params = [self.W, self.b]
166 170
  171 +def _shared_dataset(data_xy, borrow=True):
  172 + """ Function that loads the dataset into shared variables
167 173
  174 + The reason we store our dataset in shared variables is to allow
  175 + Theano to copy it into the GPU memory (when code is run on GPU).
  176 + Since copying data into the GPU is slow, copying a minibatch everytime
  177 + is needed (the default behaviour if the data is not in a shared
  178 + variable) would lead to a large decrease in performance.
  179 + """
  180 + data_x, data_y = data_xy
  181 + shared_x = theano.shared(np.asarray(data_x,
  182 + dtype=theano.config.floatX),
  183 + borrow=borrow)
  184 + shared_y = theano.shared(np.asarray(data_y,
  185 + dtype=theano.config.floatX),
  186 + borrow=borrow)
  187 + # When storing data on the GPU it has to be stored as floats
  188 + # therefore we will store the labels as ``floatX`` as well
  189 + # (``shared_y`` does exactly that). But during our computations
  190 + # we need them as ints (we use labels as index, and if they are
  191 + # floats it doesn't make sense) therefore instead of returning
  192 + # ``shared_y`` we will have to cast it to int. This little hack
  193 + # lets ous get around this issue
  194 + return shared_x, T.cast(shared_y, 'int32')
  195 +
  196 +def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
  197 + learning_rate=0.1, n_epochs=200,
  198 + nkerns=[20, 50, 50],
  199 + batch_size=400):
  200 +
  201 + if X == None:
  202 + assert dataset != None
  203 + with open(dataset, 'rb') as f:
  204 + train_set, test_set = cPickle.load(f)
  205 +
  206 + X_train, Y_train = train_set
  207 + X_test, Y_test = test_set
  208 + else:
  209 + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
  210 +
  211 + X_train, Y_train = _shared_dataset((X_train, Y_train))
  212 + X_test, Y_test = _shared_dataset((X_test, Y_test))
  213 +
  214 + # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
  215 + # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
  216 + # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
  217 + # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
  218 +
  219 + n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
  220 + n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
  221 +
  222 + print X_train.get_value(borrow=True).shape, Y_train.shape
  223 +
  224 + rng = np.random.RandomState(12306)
  225 + index = T.lscalar()
  226 + x = T.matrix('x')
  227 + y = T.ivector('y')
  228 +
  229 + ######################
  230 + # BUILD ACTUAL MODEL #
  231 + ######################
  232 + print '... building the model'
  233 +
  234 + layer0_input = x.reshape((batch_size, 1, 304, 304))
  235 +
  236 + # Construct the first convolutional pooling layer:
  237 + # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
  238 + # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
  239 + # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
  240 + layer0 = ConvPoolLayer(
  241 + rng,
  242 + input=layer0_input,
  243 + image_shape=(batch_size, 1, 304, 304),
  244 + filter_shape=(nkerns[0], 1, 8, 8),
  245 + poolsize=(4, 4)
  246 + )
  247 +
  248 + # Construct the second convolutional pooling layer
  249 + # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
  250 + # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
  251 + # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
  252 + layer1 = ConvPoolLayer(
  253 + rng,
  254 + input=layer0.output,
  255 + image_shape=(batch_size, nkerns[0], 74, 74),
  256 + filter_shape=(nkerns[1], nkerns[0], 8, 8),
  257 + poolsize=(4, 4)
  258 + )
  259 +
  260 + # Construct the third convolutional pooling layer
  261 + # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
  262 + # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
  263 + # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
  264 + layer2 = ConvPoolLayer(
  265 + rng,
  266 + input=layer1.output,
  267 + image_shape=(batch_size, nkerns[1], 16, 16),
  268 + filter_shape=(nkerns[2], nkerns[1], 5, 5),
  269 + poolsize=(3, 3)
  270 + )
  271 +
  272 + # the HiddenLayer being fully-connected, it operates on 2D matrices of
  273 + # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
  274 + # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
  275 + # or (500, 50 * 4 * 4) = (500, 800) with the default values.
  276 + layer3_input = layer2.output.flatten(2)
  277 + # construct a fully-connected sigmoidal layer
  278 + layer3 = HiddenLayer(
  279 + rng,
  280 + input=layer3_input,
  281 + n_in=nkerns[2] * 4 * 4,
  282 + n_out=500,
  283 + activation=T.tanh
  284 + )
  285 + # classify the values of the fully-connected sigmoidal layer
  286 + layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
  287 +
  288 + # the cost we minimize during training is the NLL of the model
  289 + cost = layer4.negative_log_likelihood(y)
  290 + params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
  291 + grads = T.grad(cost, params)
  292 + updates = [
  293 + (param_i, param_i - learning_rate * grad_i)
  294 + for param_i, grad_i in zip(params, grads)
  295 + ]
168 296
  297 + """
  298 + Total Parameters:
  299 + >>> 20 * 64 + 1000 * 64 + 2500 * 25 + 50 * 16 * 500 + 500 * 2
  300 + 528780
  301 + """
  302 + train_model = theano.function(
  303 + [index],
  304 + cost,
  305 + updates=updates,
  306 + givens={
  307 + x: X_train[index * batch_size: (index + 1) * batch_size],
  308 + y: Y_train[index * batch_size: (index + 1) * batch_size]
  309 + }
  310 + )
  311 +
  312 + test_model = theano.function(
  313 + [index],
  314 + layer4.errors(y),
  315 + givens={
  316 + x: X_test[index * batch_size: (index + 1) * batch_size],
  317 + y: Y_test[index * batch_size: (index + 1) * batch_size]
  318 + }
  319 + )
  320 +
  321 + ###############
  322 + # TRAIN MODEL #
  323 + ###############
  324 + print '... training'
  325 + # early-stopping parameters
  326 + patience = 10000 # look as this many examples regardless
  327 + patience_increase = 2 # wait this much longer when a new best is found
  328 + improvement_threshold = 0.995 # a relative improvement of this much is
  329 + # considered significant
  330 + validation_frequency = min(n_train_batches, patience / 2)
  331 + # go through this many
  332 + # minibatche before checking the network
  333 + # on the validation set; in this case we
  334 + # check every epoch
  335 +
  336 + best_validation_loss = np.inf
  337 + best_iter = 0
  338 + test_score = 0.
  339 + start_time = time.clock()
  340 +
  341 + epoch = 0
  342 + done_looping = False
  343 +
  344 + while (epoch < n_epochs) and (not done_looping):
  345 + epoch = epoch + 1
  346 + for minibatch_index in xrange(n_train_batches):
  347 +
  348 + iter = (epoch - 1) * n_train_batches + minibatch_index
  349 +
  350 + # if iter % 100 == 0:
  351 + # print 'training @ iter = ', iter
  352 + print 'training @ iter = ', iter
  353 + cost_ij = train_model(minibatch_index)
  354 +
  355 + if (iter + 1) % validation_frequency == 0:
  356 +
  357 + # compute zero-one loss on validation set
  358 + validation_losses = [test_model(i) for i in xrange(n_test_batches)]
  359 + this_validation_loss = np.mean(validation_losses)
  360 + print('epoch %i, minibatch %i/%i, validation error %f %%' %
  361 + (epoch, minibatch_index + 1, n_train_batches,
  362 + this_validation_loss * 100.))
  363 +
  364 + # if we got the best validation score until now
  365 + if this_validation_loss < best_validation_loss:
  366 +
  367 + # improve patience if loss improvement is good enough
  368 + if this_validation_loss < best_validation_loss * \
  369 + improvement_threshold:
  370 + patience = max(patience, iter * patience_increase)
  371 +
  372 + # save best validation score and iteration number
  373 + best_validation_loss = this_validation_loss
  374 + best_iter = iter
  375 +
  376 + if patience <= iter:
  377 + done_looping = True
  378 + break
  379 +
  380 + end_time = time.clock()
  381 + print('Optimization complete.')
  382 + print('Best validation score of %f %% obtained at iteration %i, '
  383 + 'with test performance %f %%' %
  384 + (best_validation_loss * 100., best_iter + 1, test_score * 100.))
  385 + print >> sys.stderr, ('The code for file ' +
  386 + os.path.split(__file__)[1] +
  387 + ' ran for %.2fm' % ((end_time - start_time) / 60.))
169 388
170 389
171 390