Commit bde8352b4ba5a61935b4d7d0399c063df76951bb

Authored by Chunk
1 parent e6be6b61
Exists in master and in 1 other branch refactor

shuffling.

mdata/ILSVRC.py
... ... @@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase):
299 299 # if w < 300 or h < 300:
300 300 # continue
301 301 # left, upper = random.randint(0, w - 300), random.randint(0, h - 300)
302   - # img_crop = img[upper:upper + 300, left:left + 300]
303   - # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop)
  302 + # img_crop = img[upper:upper + 300, left:left + 300]
  303 + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop)
304 304 # except Exception as e:
305   - # print '[EXCPT]', e
  305 + # print '[EXCPT]', e
306 306 # pass
307 307  
308 308  
... ... @@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase):
439 439 pass
440 440  
441 441  
442   - def load_data(self, mode='local', feattype='ibd', tagtype='class'):
  442 + def load_data(self, mode='local', feattype='ibd', tagtype='class', shuffle=False):
443 443 INDEX = []
444 444 X = []
445 445 Y = []
... ... @@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase):
461 461  
462 462 for tag, feat in dict_dataset.values():
463 463 feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0
464   - # feat = np.bitwise_and(feat, 1)
  464 + feat = np.absolute(feat)
  465 + feat = np.bitwise_and(feat, 1)
465 466 X.append(feat.ravel())
466 467 Y.append(int(tag))
467 468  
... ... @@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase):
503 504 else:
504 505 raise Exception("Unknown mode!")
505 506  
506   - return X, Y
  507 + if shuffle:
  508 + # shuffling
  509 + Z = zip(X, Y)
  510 + np.random.shuffle(Z)
  511 + return Z
507 512  
  513 + return X, Y
508 514  
509 515  
510 516  
... ...
mmodel/caffe/helper.py
... ... @@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data=&#39;../res/data_lmdb&#39;, lmdb_name_label=&#39;..
61 61 in_db_data.close()
62 62  
63 63  
64   -def write_lmdb(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'):
  64 +def write_lmdb(X, Y=None, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'):
65 65 """
66 66 X - numpy array of data.
67 67 Y - numpy array of labels.
68 68 """
69   - print('writing image data...')
70   - for idx in range(int(math.ceil(len(Y) / 1000.0))):
71   - in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))
72   - with in_db_data.begin(write=True) as in_txn:
73   - for in_idx, (in_, label_) in enumerate(
74   - zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])):
75   - # im = caffe.io.load_image(in_)
76   - im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_)
77   - in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())
78   -
79   - print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))
80   - in_db_data.close()
  69 + if Y != None:
  70 + print('writing image data...')
  71 + for idx in range(int(math.ceil(len(Y) / 1000.0))):
  72 + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))
  73 + with in_db_data.begin(write=True) as in_txn:
  74 + for in_idx, (in_, label_) in enumerate(
  75 + zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])):
  76 + # im = caffe.io.load_image(in_)
  77 + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_)
  78 + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())
  79 +
  80 + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))
  81 + in_db_data.close()
  82 + else:
  83 + assert isinstance(X[0], tuple)
  84 + print('writing image data...')
  85 + for idx in range(int(math.ceil(len(X) / 1000.0))):
  86 + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12))
  87 + with in_db_data.begin(write=True) as in_txn:
  88 + for in_idx, (in_, label_) in enumerate(X[(1000 * idx):(1000 * (idx + 1))]):
  89 + # im = caffe.io.load_image(in_)
  90 + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_)
  91 + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString())
  92 +
  93 + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X))
  94 + in_db_data.close()
81 95  
82 96  
83 97 if __name__ == '__main__':
... ...
test/test_data.py
... ... @@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop
6 6  
7 7 from ..mmodel.caffe.helper import *
8 8  
  9 +
9 10 def test_MSR():
10 11 dmsr = MSR.DataMSR()
11 12 # msrd.format()
... ... @@ -164,14 +165,11 @@ def test_caffe():
164 165 # return
165 166  
166 167 dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil')
167   - X, Y = dil.load_data(mode='local', feattype='coef')
  168 + X = dil.load_data(mode='local', feattype='coef', shuffle=True)
168 169 print X[0]
169   - print Y
170   - print np.array(X).shape, np.array(Y).shape
171   -
172   - write_lmdb(X[2000:3000],Y[2000:3000])
173   -
  170 + print np.array(X).shape
174 171  
  172 + write_lmdb(X[7000:])
175 173  
176 174  
177 175 if __name__ == '__main__':
... ...