Commit bde8352b4ba5a61935b4d7d0399c063df76951bb
1 parent
e6be6b61
Exists in
master
and in
1 other branch
shuffling.
Showing
3 changed files
with
43 additions
and
25 deletions
Show diff stats
mdata/ILSVRC.py
... | ... | @@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase): |
299 | 299 | # if w < 300 or h < 300: |
300 | 300 | # continue |
301 | 301 | # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) |
302 | - # img_crop = img[upper:upper + 300, left:left + 300] | |
303 | - # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | |
302 | + # img_crop = img[upper:upper + 300, left:left + 300] | |
303 | + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | |
304 | 304 | # except Exception as e: |
305 | - # print '[EXCPT]', e | |
305 | + # print '[EXCPT]', e | |
306 | 306 | # pass |
307 | 307 | |
308 | 308 | |
... | ... | @@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase): |
439 | 439 | pass |
440 | 440 | |
441 | 441 | |
442 | - def load_data(self, mode='local', feattype='ibd', tagtype='class'): | |
442 | + def load_data(self, mode='local', feattype='ibd', tagtype='class', shuffle=False): | |
443 | 443 | INDEX = [] |
444 | 444 | X = [] |
445 | 445 | Y = [] |
... | ... | @@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase): |
461 | 461 | |
462 | 462 | for tag, feat in dict_dataset.values(): |
463 | 463 | feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0 |
464 | - # feat = np.bitwise_and(feat, 1) | |
464 | + feat = np.absolute(feat) | |
465 | + feat = np.bitwise_and(feat, 1) | |
465 | 466 | X.append(feat.ravel()) |
466 | 467 | Y.append(int(tag)) |
467 | 468 | |
... | ... | @@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase): |
503 | 504 | else: |
504 | 505 | raise Exception("Unknown mode!") |
505 | 506 | |
506 | - return X, Y | |
507 | + if shuffle: | |
508 | + # shuffling | |
509 | + Z = zip(X, Y) | |
510 | + np.random.shuffle(Z) | |
511 | + return Z | |
507 | 512 | |
513 | + return X, Y | |
508 | 514 | |
509 | 515 | |
510 | 516 | ... | ... |
mmodel/caffe/helper.py
... | ... | @@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='.. |
61 | 61 | in_db_data.close() |
62 | 62 | |
63 | 63 | |
64 | -def write_lmdb(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): | |
64 | +def write_lmdb(X, Y=None, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): | |
65 | 65 | """ |
66 | 66 | X - numpy array of data. |
67 | 67 | Y - numpy array of labels. |
68 | 68 | """ |
69 | - print('writing image data...') | |
70 | - for idx in range(int(math.ceil(len(Y) / 1000.0))): | |
71 | - in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | |
72 | - with in_db_data.begin(write=True) as in_txn: | |
73 | - for in_idx, (in_, label_) in enumerate( | |
74 | - zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): | |
75 | - # im = caffe.io.load_image(in_) | |
76 | - im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | |
77 | - in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | |
78 | - | |
79 | - print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | |
80 | - in_db_data.close() | |
69 | + if Y != None: | |
70 | + print('writing image data...') | |
71 | + for idx in range(int(math.ceil(len(Y) / 1000.0))): | |
72 | + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | |
73 | + with in_db_data.begin(write=True) as in_txn: | |
74 | + for in_idx, (in_, label_) in enumerate( | |
75 | + zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): | |
76 | + # im = caffe.io.load_image(in_) | |
77 | + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | |
78 | + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | |
79 | + | |
80 | + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | |
81 | + in_db_data.close() | |
82 | + else: | |
83 | + assert isinstance(X[0], tuple) | |
84 | + print('writing image data...') | |
85 | + for idx in range(int(math.ceil(len(X) / 1000.0))): | |
86 | + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | |
87 | + with in_db_data.begin(write=True) as in_txn: | |
88 | + for in_idx, (in_, label_) in enumerate(X[(1000 * idx):(1000 * (idx + 1))]): | |
89 | + # im = caffe.io.load_image(in_) | |
90 | + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | |
91 | + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | |
92 | + | |
93 | + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | |
94 | + in_db_data.close() | |
81 | 95 | |
82 | 96 | |
83 | 97 | if __name__ == '__main__': | ... | ... |
test/test_data.py
... | ... | @@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop |
6 | 6 | |
7 | 7 | from ..mmodel.caffe.helper import * |
8 | 8 | |
9 | + | |
9 | 10 | def test_MSR(): |
10 | 11 | dmsr = MSR.DataMSR() |
11 | 12 | # msrd.format() |
... | ... | @@ -164,14 +165,11 @@ def test_caffe(): |
164 | 165 | # return |
165 | 166 | |
166 | 167 | dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil') |
167 | - X, Y = dil.load_data(mode='local', feattype='coef') | |
168 | + X = dil.load_data(mode='local', feattype='coef', shuffle=True) | |
168 | 169 | print X[0] |
169 | - print Y | |
170 | - print np.array(X).shape, np.array(Y).shape | |
171 | - | |
172 | - write_lmdb(X[2000:3000],Y[2000:3000]) | |
173 | - | |
170 | + print np.array(X).shape | |
174 | 171 | |
172 | + write_lmdb(X[7000:]) | |
175 | 173 | |
176 | 174 | |
177 | 175 | if __name__ == '__main__': | ... | ... |