mnist re-testing...

Chunk
1 parent 3ef6ddf1
Showing 3 changed files with 221 additions and 31 deletions Show diff stats
mmodel/theano/THEANO.py
mmodel/theano/theanoutil.py
test/test_model.py
@@ -37,38 +37,223 @@ class ModelTHEANO(ModelBase):
         self.sparker = sc
         self.model = None
  
-    def _shared_dataset(self, data_xy, borrow=True):
-        """ Function that loads the dataset into shared variables
-
-        The reason we store our dataset in shared variables is to allow
-        Theano to copy it into the GPU memory (when code is run on GPU).
-        Since copying data into the GPU is slow, copying a minibatch everytime
-        is needed (the default behaviour if the data is not in a shared
-        variable) would lead to a large decrease in performance.
-        """
-        data_x, data_y = data_xy
-        shared_x = theano.shared(np.asarray(data_x,
-                                            dtype=theano.config.floatX),
-                                 borrow=borrow)
-        shared_y = theano.shared(np.asarray(data_y,
-                                            dtype=theano.config.floatX),
-                                 borrow=borrow)
-        # When storing data on the GPU it has to be stored as floats
-        # therefore we will store the labels as ``floatX`` as well
-        # (``shared_y`` does exactly that). But during our computations
-        # we need them as ints (we use labels as index, and if they are
-        # floats it doesn't make sense) therefore instead of returning
-        # ``shared_y`` we will have to cast it to int. This little hack
-        # lets ous get around this issue
-        return shared_x, T.cast(shared_y, 'int32')
-
-    def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'ils_crop.pkl'),
+    def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
                    learning_rate=0.1, n_epochs=200,
                    nkerns=[20, 50, 50],
                    batch_size=400):
  
-        return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
-                                 batch_size=batch_size)
+        # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
+        #                          batch_size=batch_size)
+
+        with gzip.open(dataset, 'rb') as f:
+            train_set, valid_set, test_set = cPickle.load(f)
+
+        train_set_x, train_set_y = shared_dataset(train_set)
+        valid_set_x, valid_set_y = shared_dataset(valid_set)
+        test_set_x, test_set_y = shared_dataset(test_set)
+
+        # compute number of minibatches for training, validation and testing
+        n_train_batches = train_set_x.get_value(borrow=True).shape[0]
+        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
+        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
+        n_train_batches /= batch_size
+        n_valid_batches /= batch_size
+        n_test_batches /= batch_size
+
+        print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape
+
+        rng = np.random.RandomState(12306)
+        index = T.lscalar()  # index to a [mini]batch
+        # start-snippet-1
+        x = T.matrix('x')   # the data is presented as rasterized images
+        y = T.ivector('y')  # the labels are presented as 1D vector of
+                            # [int] labels
+
+        ######################
+        # BUILD ACTUAL MODEL #
+        ######################
+        print '... building the model'
+
+        layer0_input = x.reshape((batch_size, 1, 28, 28))
+
+        # Construct the first convolutional pooling layer:
+        # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
+        # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
+        # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
+        layer0 = ConvPoolLayer(
+            rng,
+            input=layer0_input,
+            image_shape=(batch_size, 1, 28, 28),
+            filter_shape=(nkerns[0], 1, 5, 5),
+            poolsize=(2, 2)
+        )
+
+        # Construct the second convolutional pooling layer
+        # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
+        # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
+        # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
+        layer1 = ConvPoolLayer(
+            rng,
+            input=layer0.output,
+            image_shape=(batch_size, nkerns[0], 12, 12),
+            filter_shape=(nkerns[1], nkerns[0], 5, 5),
+            poolsize=(2, 2)
+        )
+
+        # the HiddenLayer being fully-connected, it operates on 2D matrices of
+        # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
+        # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
+        # or (500, 50 * 4 * 4) = (500, 800) with the default values.
+        layer2_input = layer1.output.flatten(2)
+
+        # construct a fully-connected sigmoidal layer
+        layer2 = HiddenLayer(
+            rng,
+            input=layer2_input,
+            n_in=nkerns[1] * 4 * 4,
+            n_out=500,
+            activation=T.tanh
+        )
+
+        # classify the values of the fully-connected sigmoidal layer
+        layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
+
+        # the cost we minimize during training is the NLL of the model
+        cost = layer3.negative_log_likelihood(y)
+
+        # create a function to compute the mistakes that are made by the model
+        test_model = theano.function(
+            [index],
+            layer3.errors(y),
+            givens={
+                x: test_set_x[index * batch_size: (index + 1) * batch_size],
+                y: test_set_y[index * batch_size: (index + 1) * batch_size]
+            }
+        )
+
+        validate_model = theano.function(
+            [index],
+            layer3.errors(y),
+            givens={
+                x: valid_set_x[index * batch_size: (index + 1) * batch_size],
+                y: valid_set_y[index * batch_size: (index + 1) * batch_size]
+            }
+        )
+
+        # create a list of all model parameters to be fit by gradient descent
+        params = layer3.params + layer2.params + layer1.params + layer0.params
+
+        # create a list of gradients for all model parameters
+        grads = T.grad(cost, params)
+
+        # train_model is a function that updates the model parameters by
+        # SGD Since this model has many parameters, it would be tedious to
+        # manually create an update rule for each model parameter. We thus
+        # create the updates list by automatically looping over all
+        # (params[i], grads[i]) pairs.
+        updates = [
+            (param_i, param_i - learning_rate * grad_i)
+            for param_i, grad_i in zip(params, grads)
+        ]
+
+        train_model = theano.function(
+            [index],
+            cost,
+            updates=updates,
+            givens={
+                x: train_set_x[index * batch_size: (index + 1) * batch_size],
+                y: train_set_y[index * batch_size: (index + 1) * batch_size]
+            }
+        )
+        # end-snippet-1
+
+        ###############
+        # TRAIN MODEL #
+        ###############
+        print '... training'
+        # early-stopping parameters
+        patience = 10000  # look as this many examples regardless
+        patience_increase = 2  # wait this much longer when a new best is
+                               # found
+        improvement_threshold = 0.995  # a relative improvement of this much is
+                                       # considered significant
+        validation_frequency = min(n_train_batches, patience / 2)
+                                      # go through this many
+                                      # minibatche before checking the network
+                                      # on the validation set; in this case we
+                                      # check every epoch
+
+        best_validation_loss = np.inf
+        best_iter = 0
+        test_score = 0.
+        start_time = time.clock()
+
+        epoch = 0
+        done_looping = False
+
+        while (epoch < n_epochs) and (not done_looping):
+            epoch = epoch + 1
+            for minibatch_index in xrange(n_train_batches):
+
+                iter = (epoch - 1) * n_train_batches + minibatch_index
+
+                if iter % 100 == 0:
+                    print 'training @ iter = ', iter
+                cost_ij = train_model(minibatch_index)
+
+                if (iter + 1) % validation_frequency == 0:
+
+                    # compute zero-one loss on validation set
+                    validation_losses = [validate_model(i) for i
+                                         in xrange(n_valid_batches)]
+                    this_validation_loss = np.mean(validation_losses)
+                    print('epoch %i, minibatch %i/%i, validation error %f %%' %
+                          (epoch, minibatch_index + 1, n_train_batches,
+                           this_validation_loss * 100.))
+
+                    # if we got the best validation score until now
+                    if this_validation_loss < best_validation_loss:
+
+                        #improve patience if loss improvement is good enough
+                        if this_validation_loss < best_validation_loss *  \
+                           improvement_threshold:
+                            patience = max(patience, iter * patience_increase)
+
+                        # save best validation score and iteration number
+                        best_validation_loss = this_validation_loss
+                        best_iter = iter
+
+                        # test it on the test set
+                        test_losses = [
+                            test_model(i)
+                            for i in xrange(n_test_batches)
+                        ]
+                        test_score = np.mean(test_losses)
+                        print(('     epoch %i, minibatch %i/%i, test error of '
+                               'best model %f %%') %
+                              (epoch, minibatch_index + 1, n_train_batches,
+                               test_score * 100.))
+
+                if patience <= iter:
+                    done_looping = True
+                    break
+
+        end_time = time.clock()
+        print('Optimization complete.')
+        print('Best validation score of %f %% obtained at iteration %i, '
+              'with test performance %f %%' %
+              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
+        print >> sys.stderr, ('The code for file ' +
+                              os.path.split(__file__)[1] +
+                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
+
+
+
+
+
+
+
+
  
  
     def train(self, X, Y):
@@ -168,7 +168,7 @@ class ConvPoolLayer(object):
         self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
         self.params = [self.W, self.b]
  
-def _shared_dataset(data_xy, borrow=True):
+def shared_dataset(data_xy, borrow=True):
     """ Function that loads the dataset into shared variables
  
     The reason we store our dataset in shared variables is to allow
@@ -208,8 +208,8 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join(&#39;&#39;, &#39;../../res/&#39;, &#39;il
     else:
         X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
  
-    X_train, Y_train = _shared_dataset((X_train, Y_train))
-    X_test, Y_test = _shared_dataset((X_test, Y_test))
+    X_train, Y_train = shared_dataset((X_train, Y_train))
+    X_test, Y_test = shared_dataset((X_test, Y_test))
  
     # X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
     # Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
@@ -149,6 +149,11 @@ def test_SVM_ILSVRC_S():
     # test_SVM_ILSVRC_SPARK()
  
  
+def test_THEANO_mnist():
+    mtheano = THEANO.ModelTHEANO(toolset='cnn')
+    mtheano._train_cnn(learning_rate=0.1, n_epochs=200, dataset=os.path.join(package_dir, '../res/', 'mnist.pkl.gz'), nkerns=[20, 50], batch_size=500)
+
+
 def test_THEANO_crop():
     timer.mark()
     dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil')
...	...	@@ -37,38 +37,223 @@ class ModelTHEANO(ModelBase):
37	37	self.sparker = sc
38	38	self.model = None
39	39
40		- def _shared_dataset(self, data_xy, borrow=True):
41		- """ Function that loads the dataset into shared variables
42		-
43		- The reason we store our dataset in shared variables is to allow
44		- Theano to copy it into the GPU memory (when code is run on GPU).
45		- Since copying data into the GPU is slow, copying a minibatch everytime
46		- is needed (the default behaviour if the data is not in a shared
47		- variable) would lead to a large decrease in performance.
48		- """
49		- data_x, data_y = data_xy
50		- shared_x = theano.shared(np.asarray(data_x,
51		- dtype=theano.config.floatX),
52		- borrow=borrow)
53		- shared_y = theano.shared(np.asarray(data_y,
54		- dtype=theano.config.floatX),
55		- borrow=borrow)
56		- # When storing data on the GPU it has to be stored as floats
57		- # therefore we will store the labels as ``floatX`` as well
58		- # (``shared_y`` does exactly that). But during our computations
59		- # we need them as ints (we use labels as index, and if they are
60		- # floats it doesn't make sense) therefore instead of returning
61		- # ``shared_y`` we will have to cast it to int. This little hack
62		- # lets ous get around this issue
63		- return shared_x, T.cast(shared_y, 'int32')
64		-
65		- def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'ils_crop.pkl'),
	40	+ def _train_cnn(self, X=None, Y=None, dataset=os.path.join(package_dir, '../../res/', 'mnist.pkl.gz'),
66	41	learning_rate=0.1, n_epochs=200,
67	42	nkerns=[20, 50, 50],
68	43	batch_size=400):
69	44
70		- return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
71		- batch_size=batch_size)
	45	+ # return train_cnn_example(X, Y, dataset=dataset, learning_rate=learning_rate, n_epochs=n_epochs, nkerns=nkerns,
	46	+ # batch_size=batch_size)
	47	+
	48	+ with gzip.open(dataset, 'rb') as f:
	49	+ train_set, valid_set, test_set = cPickle.load(f)
	50	+
	51	+ train_set_x, train_set_y = shared_dataset(train_set)
	52	+ valid_set_x, valid_set_y = shared_dataset(valid_set)
	53	+ test_set_x, test_set_y = shared_dataset(test_set)
	54	+
	55	+ # compute number of minibatches for training, validation and testing
	56	+ n_train_batches = train_set_x.get_value(borrow=True).shape[0]
	57	+ n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
	58	+ n_test_batches = test_set_x.get_value(borrow=True).shape[0]
	59	+ n_train_batches /= batch_size
	60	+ n_valid_batches /= batch_size
	61	+ n_test_batches /= batch_size
	62	+
	63	+ print train_set_x.get_value(borrow=True).shape, train_set_y.get_value(borrow=True).shape
	64	+
	65	+ rng = np.random.RandomState(12306)
	66	+ index = T.lscalar() # index to a [mini]batch
	67	+ # start-snippet-1
	68	+ x = T.matrix('x') # the data is presented as rasterized images
	69	+ y = T.ivector('y') # the labels are presented as 1D vector of
	70	+ # [int] labels
	71	+
	72	+ ######################
	73	+ # BUILD ACTUAL MODEL #
	74	+ ######################
	75	+ print '... building the model'
	76	+
	77	+ layer0_input = x.reshape((batch_size, 1, 28, 28))
	78	+
	79	+ # Construct the first convolutional pooling layer:
	80	+ # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
	81	+ # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
	82	+ # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
	83	+ layer0 = ConvPoolLayer(
	84	+ rng,
	85	+ input=layer0_input,
	86	+ image_shape=(batch_size, 1, 28, 28),
	87	+ filter_shape=(nkerns[0], 1, 5, 5),
	88	+ poolsize=(2, 2)
	89	+ )
	90	+
	91	+ # Construct the second convolutional pooling layer
	92	+ # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
	93	+ # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
	94	+ # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
	95	+ layer1 = ConvPoolLayer(
	96	+ rng,
	97	+ input=layer0.output,
	98	+ image_shape=(batch_size, nkerns[0], 12, 12),
	99	+ filter_shape=(nkerns[1], nkerns[0], 5, 5),
	100	+ poolsize=(2, 2)
	101	+ )
	102	+
	103	+ # the HiddenLayer being fully-connected, it operates on 2D matrices of
	104	+ # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
	105	+ # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
	106	+ # or (500, 50 * 4 * 4) = (500, 800) with the default values.
	107	+ layer2_input = layer1.output.flatten(2)
	108	+
	109	+ # construct a fully-connected sigmoidal layer
	110	+ layer2 = HiddenLayer(
	111	+ rng,
	112	+ input=layer2_input,
	113	+ n_in=nkerns[1] * 4 * 4,
	114	+ n_out=500,
	115	+ activation=T.tanh
	116	+ )
	117	+
	118	+ # classify the values of the fully-connected sigmoidal layer
	119	+ layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
	120	+
	121	+ # the cost we minimize during training is the NLL of the model
	122	+ cost = layer3.negative_log_likelihood(y)
	123	+
	124	+ # create a function to compute the mistakes that are made by the model
	125	+ test_model = theano.function(
	126	+ [index],
	127	+ layer3.errors(y),
	128	+ givens={
	129	+ x: test_set_x[index * batch_size: (index + 1) * batch_size],
	130	+ y: test_set_y[index * batch_size: (index + 1) * batch_size]
	131	+ }
	132	+ )
	133	+
	134	+ validate_model = theano.function(
	135	+ [index],
	136	+ layer3.errors(y),
	137	+ givens={
	138	+ x: valid_set_x[index * batch_size: (index + 1) * batch_size],
	139	+ y: valid_set_y[index * batch_size: (index + 1) * batch_size]
	140	+ }
	141	+ )
	142	+
	143	+ # create a list of all model parameters to be fit by gradient descent
	144	+ params = layer3.params + layer2.params + layer1.params + layer0.params
	145	+
	146	+ # create a list of gradients for all model parameters
	147	+ grads = T.grad(cost, params)
	148	+
	149	+ # train_model is a function that updates the model parameters by
	150	+ # SGD Since this model has many parameters, it would be tedious to
	151	+ # manually create an update rule for each model parameter. We thus
	152	+ # create the updates list by automatically looping over all
	153	+ # (params[i], grads[i]) pairs.
	154	+ updates = [
	155	+ (param_i, param_i - learning_rate * grad_i)
	156	+ for param_i, grad_i in zip(params, grads)
	157	+ ]
	158	+
	159	+ train_model = theano.function(
	160	+ [index],
	161	+ cost,
	162	+ updates=updates,
	163	+ givens={
	164	+ x: train_set_x[index * batch_size: (index + 1) * batch_size],
	165	+ y: train_set_y[index * batch_size: (index + 1) * batch_size]
	166	+ }
	167	+ )
	168	+ # end-snippet-1
	169	+
	170	+ ###############
	171	+ # TRAIN MODEL #
	172	+ ###############
	173	+ print '... training'
	174	+ # early-stopping parameters
	175	+ patience = 10000 # look as this many examples regardless
	176	+ patience_increase = 2 # wait this much longer when a new best is
	177	+ # found
	178	+ improvement_threshold = 0.995 # a relative improvement of this much is
	179	+ # considered significant
	180	+ validation_frequency = min(n_train_batches, patience / 2)
	181	+ # go through this many
	182	+ # minibatche before checking the network
	183	+ # on the validation set; in this case we
	184	+ # check every epoch
	185	+
	186	+ best_validation_loss = np.inf
	187	+ best_iter = 0
	188	+ test_score = 0.
	189	+ start_time = time.clock()
	190	+
	191	+ epoch = 0
	192	+ done_looping = False
	193	+
	194	+ while (epoch < n_epochs) and (not done_looping):
	195	+ epoch = epoch + 1
	196	+ for minibatch_index in xrange(n_train_batches):
	197	+
	198	+ iter = (epoch - 1) * n_train_batches + minibatch_index
	199	+
	200	+ if iter % 100 == 0:
	201	+ print 'training @ iter = ', iter
	202	+ cost_ij = train_model(minibatch_index)
	203	+
	204	+ if (iter + 1) % validation_frequency == 0:
	205	+
	206	+ # compute zero-one loss on validation set
	207	+ validation_losses = [validate_model(i) for i
	208	+ in xrange(n_valid_batches)]
	209	+ this_validation_loss = np.mean(validation_losses)
	210	+ print('epoch %i, minibatch %i/%i, validation error %f %%' %
	211	+ (epoch, minibatch_index + 1, n_train_batches,
	212	+ this_validation_loss * 100.))
	213	+
	214	+ # if we got the best validation score until now
	215	+ if this_validation_loss < best_validation_loss:
	216	+
	217	+ #improve patience if loss improvement is good enough
	218	+ if this_validation_loss < best_validation_loss * \
	219	+ improvement_threshold:
	220	+ patience = max(patience, iter * patience_increase)
	221	+
	222	+ # save best validation score and iteration number
	223	+ best_validation_loss = this_validation_loss
	224	+ best_iter = iter
	225	+
	226	+ # test it on the test set
	227	+ test_losses = [
	228	+ test_model(i)
	229	+ for i in xrange(n_test_batches)
	230	+ ]
	231	+ test_score = np.mean(test_losses)
	232	+ print((' epoch %i, minibatch %i/%i, test error of '
	233	+ 'best model %f %%') %
	234	+ (epoch, minibatch_index + 1, n_train_batches,
	235	+ test_score * 100.))
	236	+
	237	+ if patience <= iter:
	238	+ done_looping = True
	239	+ break
	240	+
	241	+ end_time = time.clock()
	242	+ print('Optimization complete.')
	243	+ print('Best validation score of %f %% obtained at iteration %i, '
	244	+ 'with test performance %f %%' %
	245	+ (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	246	+ print >> sys.stderr, ('The code for file ' +
	247	+ os.path.split(__file__)[1] +
	248	+ ' ran for %.2fm' % ((end_time - start_time) / 60.))
	249	+
	250	+
	251	+
	252	+
	253	+
	254	+
	255	+
	256	+
72	257
73	258
74	259	def train(self, X, Y):
...	...
...	...	@@ -168,7 +168,7 @@ class ConvPoolLayer(object):
168	168	self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
169	169	self.params = [self.W, self.b]
170	170
171		-def _shared_dataset(data_xy, borrow=True):
	171	+def shared_dataset(data_xy, borrow=True):
172	172	""" Function that loads the dataset into shared variables
173	173
174	174	The reason we store our dataset in shared variables is to allow
...	...	@@ -208,8 +208,8 @@ def train_cnn_example(X=None, Y=None, dataset=os.path.join('', '../../res/', 'il
208	208	else:
209	209	X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=0)
210	210
211		- X_train, Y_train = _shared_dataset((X_train, Y_train))
212		- X_test, Y_test = _shared_dataset((X_test, Y_test))
	211	+ X_train, Y_train = shared_dataset((X_train, Y_train))
	212	+ X_test, Y_test = shared_dataset((X_test, Y_test))
213	213
214	214	# X_train = theano.shared(np.asarray(X_train, dtype=theano.config.floatX), borrow=True)
215	215	# Y_train = theano.shared(np.asarray(Y_train, dtype=theano.config.floatX), borrow=True)
...	...
...	...	@@ -149,6 +149,11 @@ def test_SVM_ILSVRC_S():
149	149	# test_SVM_ILSVRC_SPARK()
150	150
151	151
	152	+def test_THEANO_mnist():
	153	+ mtheano = THEANO.ModelTHEANO(toolset='cnn')
	154	+ mtheano._train_cnn(learning_rate=0.1, n_epochs=200, dataset=os.path.join(package_dir, '../res/', 'mnist.pkl.gz'), nkerns=[20, 50], batch_size=500)
	155	+
	156	+
152	157	def test_THEANO_crop():
153	158	timer.mark()
154	159	dilc = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_crop_pil')
...	...