Commit bde8352b4ba5a61935b4d7d0399c063df76951bb

Authored by Chunk
1 parent e6be6b61
Exists in master and in 1 other branch refactor

shuffling.

mdata/ILSVRC.py
@@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase): @@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase):
299 # if w < 300 or h < 300: 299 # if w < 300 or h < 300:
300 # continue 300 # continue
301 # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) 301 # left, upper = random.randint(0, w - 300), random.randint(0, h - 300)
302 - # img_crop = img[upper:upper + 300, left:left + 300]  
303 - # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) 302 + # img_crop = img[upper:upper + 300, left:left + 300]
  303 + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop)
304 # except Exception as e: 304 # except Exception as e:
305 - # print '[EXCPT]', e 305 + # print '[EXCPT]', e
306 # pass 306 # pass
307 307
308 308
@@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase): @@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase):
439 pass 439 pass
440 440
441 441
442 - def load_data(self, mode='local', feattype='ibd', tagtype='class'): 442 + def load_data(self, mode='local', feattype='ibd', tagtype='class', shuffle=False):
443 INDEX = [] 443 INDEX = []
444 X = [] 444 X = []
445 Y = [] 445 Y = []
@@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase): @@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase):
461 461
462 for tag, feat in dict_dataset.values(): 462 for tag, feat in dict_dataset.values():
463 feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0 463 feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0
464 - # feat = np.bitwise_and(feat, 1) 464 + feat = np.absolute(feat)
  465 + feat = np.bitwise_and(feat, 1)
465 X.append(feat.ravel()) 466 X.append(feat.ravel())
466 Y.append(int(tag)) 467 Y.append(int(tag))
467 468
@@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase): @@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase):
503 else: 504 else:
504 raise Exception("Unknown mode!") 505 raise Exception("Unknown mode!")
505 506
506 - return X, Y 507 + if shuffle:
  508 + # shuffling
  509 + Z = zip(X, Y)
  510 + np.random.shuffle(Z)
  511 + return Z
507 512
  513 + return X, Y
508 514
509 515
510 516
mmodel/caffe/helper.py
@@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data=&#39;../res/data_lmdb&#39;, lmdb_name_label=&#39;.. @@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data=&#39;../res/data_lmdb&#39;, lmdb_name_label=&#39;..
61 in_db_data.close() 61 in_db_data.close()
62 62
63 63
64 -def write_lmdb(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): 64 +def write_lmdb(X, Y=None, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'):
65 """ 65 """
66 X - numpy array of data. 66 X - numpy array of data.
67 Y - numpy array of labels. 67 Y - numpy array of labels.
68 """ 68 """
69 - print('writing image data...')  
70 - for idx in range(int(math.ceil(len(Y) / 1000.0))):  
71 - in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))  
72 - with in_db_data.begin(write=True) as in_txn:  
73 - for in_idx, (in_, label_) in enumerate(  
74 - zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])):  
75 - # im = caffe.io.load_image(in_)  
76 - im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_)  
77 - in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())  
78 -  
79 - print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))  
80 - in_db_data.close() 69 + if Y != None:
  70 + print('writing image data...')
  71 + for idx in range(int(math.ceil(len(Y) / 1000.0))):
  72 + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))
  73 + with in_db_data.begin(write=True) as in_txn:
  74 + for in_idx, (in_, label_) in enumerate(
  75 + zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])):
  76 + # im = caffe.io.load_image(in_)
  77 + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_)
  78 + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())
  79 +
  80 + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))
  81 + in_db_data.close()
  82 + else:
  83 + assert isinstance(X[0], tuple)
  84 + print('writing image data...')
  85 + for idx in range(int(math.ceil(len(X) / 1000.0))):
  86 + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))
  87 + with in_db_data.begin(write=True) as in_txn:
  88 + for in_idx, (in_, label_) in enumerate(X[(1000 * idx):(1000 * (idx + 1))]):
  89 + # im = caffe.io.load_image(in_)
  90 + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_)
  91 + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())
  92 +
  93 + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))
  94 + in_db_data.close()
81 95
82 96
83 if __name__ == '__main__': 97 if __name__ == '__main__':
test/test_data.py
@@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop @@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop
6 6
7 from ..mmodel.caffe.helper import * 7 from ..mmodel.caffe.helper import *
8 8
  9 +
9 def test_MSR(): 10 def test_MSR():
10 dmsr = MSR.DataMSR() 11 dmsr = MSR.DataMSR()
11 # msrd.format() 12 # msrd.format()
@@ -164,14 +165,11 @@ def test_caffe(): @@ -164,14 +165,11 @@ def test_caffe():
164 # return 165 # return
165 166
166 dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil') 167 dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil')
167 - X, Y = dil.load_data(mode='local', feattype='coef') 168 + X = dil.load_data(mode='local', feattype='coef', shuffle=True)
168 print X[0] 169 print X[0]
169 - print Y  
170 - print np.array(X).shape, np.array(Y).shape  
171 -  
172 - write_lmdb(X[2000:3000],Y[2000:3000])  
173 - 170 + print np.array(X).shape
174 171
  172 + write_lmdb(X[7000:])
175 173
176 174
177 if __name__ == '__main__': 175 if __name__ == '__main__':