Commit 3ef6ddf1034ad7bb6f5715622ca9c231af3f3084

Authored by Chunk
1 parent b2b2636c
Exists in master and in 1 other branch refactor

staged.

mmodel/theano/THEANO.py
... ... @@ -30,6 +30,7 @@ class ModelTHEANO(ModelBase):
30 30  
31 31  
32 32 """
  33 +
33 34 def __init__(self, toolset='cnn', sc=None):
34 35 ModelBase.__init__(self)
35 36 self.toolset = toolset
... ... @@ -66,188 +67,8 @@ class ModelTHEANO(ModelBase):
66 67 nkerns=[20, 50, 50],
67 68 batch_size=400):
68 69  
69   - if X == None:
70   - assert dataset != None
71   - with open(dataset, 'rb') as f:
72   - train_set, test_set = cPickle.load(f)
73   -
74   - X_train, Y_train = train_set
75   - X_test, Y_test = test_set
76   - else:
77   - X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
78   -
79   - X_train, Y_train = self._shared_dataset((X_train, Y_train))
80   - X_test, Y_test = self._shared_dataset((X_test, Y_test))
81   -
82   - # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
83   - # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
84   - # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
85   - # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
86   -
87   - n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
88   - n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
89   -
90   - print X_train.get_value(borrow=True).shape, Y_train.shape
91   -
92   - rng = np.random.RandomState(12306)
93   - index = T.lscalar()
94   - x = T.matrix('x')
95   - y = T.ivector('y')
96   -
97   - ######################
98   - # BUILD ACTUAL MODEL #
99   - ######################
100   - print '... building the model'
101   -
102   - layer0_input = x.reshape((batch_size, 1, 304, 304))
103   -
104   - # Construct the first convolutional pooling layer:
105   - # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
106   - # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
107   - # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
108   - layer0 = ConvPoolLayer(
109   - rng,
110   - input=layer0_input,
111   - image_shape=(batch_size, 1, 304, 304),
112   - filter_shape=(nkerns[0], 1, 8, 8),
113   - poolsize=(4, 4)
114   - )
115   -
116   - # Construct the second convolutional pooling layer
117   - # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
118   - # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
119   - # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
120   - layer1 = ConvPoolLayer(
121   - rng,
122   - input=layer0.output,
123   - image_shape=(batch_size, nkerns[0], 74, 74),
124   - filter_shape=(nkerns[1], nkerns[0], 8, 8),
125   - poolsize=(4, 4)
126   - )
127   -
128   - # Construct the third convolutional pooling layer
129   - # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
130   - # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
131   - # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
132   - layer2 = ConvPoolLayer(
133   - rng,
134   - input=layer1.output,
135   - image_shape=(batch_size, nkerns[1], 16, 16),
136   - filter_shape=(nkerns[2], nkerns[1], 5, 5),
137   - poolsize=(3, 3)
138   - )
139   -
140   - # the HiddenLayer being fully-connected, it operates on 2D matrices of
141   - # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
142   - # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
143   - # or (500, 50 * 4 * 4) = (500, 800) with the default values.
144   - layer3_input = layer2.output.flatten(2)
145   - # construct a fully-connected sigmoidal layer
146   - layer3 = HiddenLayer(
147   - rng,
148   - input=layer3_input,
149   - n_in=nkerns[2] * 4 * 4,
150   - n_out=500,
151   - activation=T.tanh
152   - )
153   - # classify the values of the fully-connected sigmoidal layer
154   - layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
155   -
156   - # the cost we minimize during training is the NLL of the model
157   - cost = layer4.negative_log_likelihood(y)
158   - params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
159   - grads = T.grad(cost, params)
160   - updates = [
161   - (param_i, param_i - learning_rate * grad_i)
162   - for param_i, grad_i in zip(params, grads)
163   - ]
164   -
165   - train_model = theano.function(
166   - [index],
167   - cost,
168   - updates=updates,
169   - givens={
170   - x: X_train[index * batch_size: (index + 1) * batch_size],
171   - y: Y_train[index * batch_size: (index + 1) * batch_size]
172   - }
173   - )
174   -
175   - test_model = theano.function(
176   - [index],
177   - layer4.errors(y),
178   - givens={
179   - x: X_test[index * batch_size: (index + 1) * batch_size],
180   - y: Y_test[index * batch_size: (index + 1) * batch_size]
181   - }
182   - )
183   -
184   - ###############
185   - # TRAIN MODEL #
186   - ###############
187   - print '... training'
188   - # early-stopping parameters
189   - patience = 10000 # look as this many examples regardless
190   - patience_increase = 2 # wait this much longer when a new best is found
191   - improvement_threshold = 0.995 # a relative improvement of this much is
192   - # considered significant
193   - validation_frequency = min(n_train_batches, patience / 2)
194   - # go through this many
195   - # minibatche before checking the network
196   - # on the validation set; in this case we
197   - # check every epoch
198   -
199   - best_validation_loss = np.inf
200   - best_iter = 0
201   - test_score = 0.
202   - start_time = time.clock()
203   -
204   - epoch = 0
205   - done_looping = False
206   -
207   - while (epoch < n_epochs) and (not done_looping):
208   - epoch = epoch + 1
209   - for minibatch_index in xrange(n_train_batches):
210   -
211   - iter = (epoch - 1) * n_train_batches + minibatch_index
212   -
213   - # if iter % 100 == 0:
214   - # print 'training @ iter = ', iter
215   - print 'training @ iter = ', iter
216   - cost_ij = train_model(minibatch_index)
217   -
218   - if (iter + 1) % validation_frequency == 0:
219   -
220   - # compute zero-one loss on validation set
221   - validation_losses = [test_model(i) for i in xrange(n_test_batches)]
222   - this_validation_loss = np.mean(validation_losses)
223   - print('epoch %i, minibatch %i/%i, validation error %f %%' %
224   - (epoch, minibatch_index + 1, n_train_batches,
225   - this_validation_loss * 100.))
226   -
227   - # if we got the best validation score until now
228   - if this_validation_loss < best_validation_loss:
229   -
230   - # improve patience if loss improvement is good enough
231   - if this_validation_loss < best_validation_loss * \
232   - improvement_threshold:
233   - patience = max(patience, iter * patience_increase)
234   -
235   - # save best validation score and iteration number
236   - best_validation_loss = this_validation_loss
237   - best_iter = iter
238   -
239   - if patience <= iter:
240   - done_looping = True
241   - break
242   -
243   - end_time = time.clock()
244   - print('Optimization complete.')
245   - print('Best validation score of %f %% obtained at iteration %i, '
246   - 'with test performance %f %%' %
247   - (best_validation_loss * 100., best_iter + 1, test_score * 100.))
248   - print >> sys.stderr, ('The code for file ' +
249   - os.path.split(__file__)[1] +
250   - ' ran for %.2fm' % ((end_time - start_time) / 60.))
  70 + return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
  71 + batch_size=batch_size)
251 72  
252 73  
253 74 def train(self, X, Y):
... ...
mmodel/theano/theanoutil.py
... ... @@ -4,12 +4,16 @@ import os, sys
4 4 import time
5 5  
6 6 import numpy as np
  7 +from sklearn import cross_validation
7 8  
8 9 import theano
9 10 import theano.tensor as T
10 11 from theano.tensor.signal import downsample
11 12 from theano.tensor.nnet import conv
12 13  
  14 +import cPickle
  15 +
  16 +
13 17 class LogisticRegression(object):
14 18 """
15 19 Multi-class Logistic Regression Class
... ... @@ -164,8 +168,223 @@ class ConvPoolLayer(object):
164 168 self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
165 169 self.params = [self.W, self.b]
166 170  
  171 +def _shared_dataset(data_xy, borrow=True):
  172 + """ Function that loads the dataset into shared variables
167 173  
  174 + The reason we store our dataset in shared variables is to allow
  175 + Theano to copy it into the GPU memory (when code is run on GPU).
  176 + Since copying data into the GPU is slow, copying a minibatch everytime
  177 + is needed (the default behaviour if the data is not in a shared
  178 + variable) would lead to a large decrease in performance.
  179 + """
  180 + data_x, data_y = data_xy
  181 + shared_x = theano.shared(np.asarray(data_x,
  182 + dtype=theano.config.floatX),
  183 + borrow=borrow)
  184 + shared_y = theano.shared(np.asarray(data_y,
  185 + dtype=theano.config.floatX),
  186 + borrow=borrow)
  187 + # When storing data on the GPU it has to be stored as floats
  188 + # therefore we will store the labels as ``floatX`` as well
  189 + # (``shared_y`` does exactly that). But during our computations
  190 + # we need them as ints (we use labels as index, and if they are
  191 + # floats it doesn't make sense) therefore instead of returning
  192 + # ``shared_y`` we will have to cast it to int. This little hack
  193 + # lets ous get around this issue
  194 + return shared_x, T.cast(shared_y, 'int32')
  195 +
  196 +def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'ils_crop.pkl'),
  197 + learning_rate=0.1, n_epochs=200,
  198 + nkerns=[20, 50, 50],
  199 + batch_size=400):
  200 +
  201 + if X == None:
  202 + assert dataset != None
  203 + with open(dataset, 'rb') as f:
  204 + train_set, test_set = cPickle.load(f)
  205 +
  206 + X_train, Y_train = train_set
  207 + X_test, Y_test = test_set
  208 + else:
  209 + X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
  210 +
  211 + X_train, Y_train = _shared_dataset((X_train, Y_train))
  212 + X_test, Y_test = _shared_dataset((X_test, Y_test))
  213 +
  214 + # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
  215 + # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
  216 + # X_test = theano.shared(np.asarray(X_test, dtype=theano.config.floatX), borrow=True)
  217 + # Y_test = theano.shared(np.asarray(Y_test, dtype=theano.config.floatX), borrow=True)
  218 +
  219 + n_train_batches = X_train.get_value(borrow=True).shape[0] / batch_size
  220 + n_test_batches = X_test.get_value(borrow=True).shape[0] / batch_size
  221 +
  222 + print X_train.get_value(borrow=True).shape, Y_train.shape
  223 +
  224 + rng = np.random.RandomState(12306)
  225 + index = T.lscalar()
  226 + x = T.matrix('x')
  227 + y = T.ivector('y')
  228 +
  229 + ######################
  230 + # BUILD ACTUAL MODEL #
  231 + ######################
  232 + print '... building the model'
  233 +
  234 + layer0_input = x.reshape((batch_size, 1, 304, 304))
  235 +
  236 + # Construct the first convolutional pooling layer:
  237 + # filtering reduces the image size to (304-8+1 , 304-8+1) = (297, 297)
  238 + # maxpooling reduces this further to (297/4, 297/4) = (74, 74)
  239 + # 4D output tensor is thus of shape (batch_size, nkerns[0], 74, 74)
  240 + layer0 = ConvPoolLayer(
  241 + rng,
  242 + input=layer0_input,
  243 + image_shape=(batch_size, 1, 304, 304),
  244 + filter_shape=(nkerns[0], 1, 8, 8),
  245 + poolsize=(4, 4)
  246 + )
  247 +
  248 + # Construct the second convolutional pooling layer
  249 + # filtering reduces the image size to (74-8+1, 74-8+1) = (67, 67)
  250 + # maxpooling reduces this further to (67/4, 67/4) = (16, 16)
  251 + # 4D output tensor is thus of shape (batch_size, nkerns[1], 16, 16)
  252 + layer1 = ConvPoolLayer(
  253 + rng,
  254 + input=layer0.output,
  255 + image_shape=(batch_size, nkerns[0], 74, 74),
  256 + filter_shape=(nkerns[1], nkerns[0], 8, 8),
  257 + poolsize=(4, 4)
  258 + )
  259 +
  260 + # Construct the third convolutional pooling layer
  261 + # filtering reduces the image size to (16-5+1, 16-5+1) = (12, 12)
  262 + # maxpooling reduces this further to (12/3, 12/3) = (4, 4)
  263 + # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)
  264 + layer2 = ConvPoolLayer(
  265 + rng,
  266 + input=layer1.output,
  267 + image_shape=(batch_size, nkerns[1], 16, 16),
  268 + filter_shape=(nkerns[2], nkerns[1], 5, 5),
  269 + poolsize=(3, 3)
  270 + )
  271 +
  272 + # the HiddenLayer being fully-connected, it operates on 2D matrices of
  273 + # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
  274 + # This will generate a matrix of shape (batch_size, nkerns[2] * 4 * 4),
  275 + # or (500, 50 * 4 * 4) = (500, 800) with the default values.
  276 + layer3_input = layer2.output.flatten(2)
  277 + # construct a fully-connected sigmoidal layer
  278 + layer3 = HiddenLayer(
  279 + rng,
  280 + input=layer3_input,
  281 + n_in=nkerns[2] * 4 * 4,
  282 + n_out=500,
  283 + activation=T.tanh
  284 + )
  285 + # classify the values of the fully-connected sigmoidal layer
  286 + layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=2)
  287 +
  288 + # the cost we minimize during training is the NLL of the model
  289 + cost = layer4.negative_log_likelihood(y)
  290 + params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
  291 + grads = T.grad(cost, params)
  292 + updates = [
  293 + (param_i, param_i - learning_rate * grad_i)
  294 + for param_i, grad_i in zip(params, grads)
  295 + ]
168 296  
  297 + """
  298 + Total Parameters:
  299 + >>> 20 * 64 + 1000 * 64 + 2500 * 25 + 50 * 16 * 500 + 500 * 2
  300 + 528780
  301 + """
  302 + train_model = theano.function(
  303 + [index],
  304 + cost,
  305 + updates=updates,
  306 + givens={
  307 + x: X_train[index * batch_size: (index + 1) * batch_size],
  308 + y: Y_train[index * batch_size: (index + 1) * batch_size]
  309 + }
  310 + )
  311 +
  312 + test_model = theano.function(
  313 + [index],
  314 + layer4.errors(y),
  315 + givens={
  316 + x: X_test[index * batch_size: (index + 1) * batch_size],
  317 + y: Y_test[index * batch_size: (index + 1) * batch_size]
  318 + }
  319 + )
  320 +
  321 + ###############
  322 + # TRAIN MODEL #
  323 + ###############
  324 + print '... training'
  325 + # early-stopping parameters
  326 + patience = 10000 # look as this many examples regardless
  327 + patience_increase = 2 # wait this much longer when a new best is found
  328 + improvement_threshold = 0.995 # a relative improvement of this much is
  329 + # considered significant
  330 + validation_frequency = min(n_train_batches, patience / 2)
  331 + # go through this many
  332 + # minibatche before checking the network
  333 + # on the validation set; in this case we
  334 + # check every epoch
  335 +
  336 + best_validation_loss = np.inf
  337 + best_iter = 0
  338 + test_score = 0.
  339 + start_time = time.clock()
  340 +
  341 + epoch = 0
  342 + done_looping = False
  343 +
  344 + while (epoch < n_epochs) and (not done_looping):
  345 + epoch = epoch + 1
  346 + for minibatch_index in xrange(n_train_batches):
  347 +
  348 + iter = (epoch - 1) * n_train_batches + minibatch_index
  349 +
  350 + # if iter % 100 == 0:
  351 + # print 'training @ iter = ', iter
  352 + print 'training @ iter = ', iter
  353 + cost_ij = train_model(minibatch_index)
  354 +
  355 + if (iter + 1) % validation_frequency == 0:
  356 +
  357 + # compute zero-one loss on validation set
  358 + validation_losses = [test_model(i) for i in xrange(n_test_batches)]
  359 + this_validation_loss = np.mean(validation_losses)
  360 + print('epoch %i, minibatch %i/%i, validation error %f %%' %
  361 + (epoch, minibatch_index + 1, n_train_batches,
  362 + this_validation_loss * 100.))
  363 +
  364 + # if we got the best validation score until now
  365 + if this_validation_loss < best_validation_loss:
  366 +
  367 + # improve patience if loss improvement is good enough
  368 + if this_validation_loss < best_validation_loss * \
  369 + improvement_threshold:
  370 + patience = max(patience, iter * patience_increase)
  371 +
  372 + # save best validation score and iteration number
  373 + best_validation_loss = this_validation_loss
  374 + best_iter = iter
  375 +
  376 + if patience <= iter:
  377 + done_looping = True
  378 + break
  379 +
  380 + end_time = time.clock()
  381 + print('Optimization complete.')
  382 + print('Best validation score of %f %% obtained at iteration %i, '
  383 + 'with test performance %f %%' %
  384 + (best_validation_loss * 100., best_iter + 1, test_score * 100.))
  385 + print >> sys.stderr, ('The code for file ' +
  386 + os.path.split(__file__)[1] +
  387 + ' ran for %.2fm' % ((end_time - start_time) / 60.))
169 388  
170 389  
171 390  
... ...