Commit bde8352b4ba5a61935b4d7d0399c063df76951bb
1 parent
e6be6b61
Exists in
master
and in
1 other branch
shuffling.
Showing
3 changed files
with
43 additions
and
25 deletions
Show diff stats
mdata/ILSVRC.py
| ... | ... | @@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase): |
| 299 | 299 | # if w < 300 or h < 300: |
| 300 | 300 | # continue |
| 301 | 301 | # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) |
| 302 | - # img_crop = img[upper:upper + 300, left:left + 300] | |
| 303 | - # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | |
| 302 | + # img_crop = img[upper:upper + 300, left:left + 300] | |
| 303 | + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | |
| 304 | 304 | # except Exception as e: |
| 305 | - # print '[EXCPT]', e | |
| 305 | + # print '[EXCPT]', e | |
| 306 | 306 | # pass |
| 307 | 307 | |
| 308 | 308 | |
| ... | ... | @@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase): |
| 439 | 439 | pass |
| 440 | 440 | |
| 441 | 441 | |
| 442 | - def load_data(self, mode='local', feattype='ibd', tagtype='class'): | |
| 442 | + def load_data(self, mode='local', feattype='ibd', tagtype='class', shuffle=False): | |
| 443 | 443 | INDEX = [] |
| 444 | 444 | X = [] |
| 445 | 445 | Y = [] |
| ... | ... | @@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase): |
| 461 | 461 | |
| 462 | 462 | for tag, feat in dict_dataset.values(): |
| 463 | 463 | feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0 |
| 464 | - # feat = np.bitwise_and(feat, 1) | |
| 464 | + feat = np.absolute(feat) | |
| 465 | + feat = np.bitwise_and(feat, 1) | |
| 465 | 466 | X.append(feat.ravel()) |
| 466 | 467 | Y.append(int(tag)) |
| 467 | 468 | |
| ... | ... | @@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase): |
| 503 | 504 | else: |
| 504 | 505 | raise Exception("Unknown mode!") |
| 505 | 506 | |
| 506 | - return X, Y | |
| 507 | + if shuffle: | |
| 508 | + # shuffling | |
| 509 | + Z = zip(X, Y) | |
| 510 | + np.random.shuffle(Z) | |
| 511 | + return Z | |
| 507 | 512 | |
| 513 | + return X, Y | |
| 508 | 514 | |
| 509 | 515 | |
| 510 | 516 | ... | ... |
mmodel/caffe/helper.py
| ... | ... | @@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='.. |
| 61 | 61 | in_db_data.close() |
| 62 | 62 | |
| 63 | 63 | |
| 64 | -def write_lmdb(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): | |
| 64 | +def write_lmdb(X, Y=None, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): | |
| 65 | 65 | """ |
| 66 | 66 | X - numpy array of data. |
| 67 | 67 | Y - numpy array of labels. |
| 68 | 68 | """ |
| 69 | - print('writing image data...') | |
| 70 | - for idx in range(int(math.ceil(len(Y) / 1000.0))): | |
| 71 | - in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | |
| 72 | - with in_db_data.begin(write=True) as in_txn: | |
| 73 | - for in_idx, (in_, label_) in enumerate( | |
| 74 | - zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): | |
| 75 | - # im = caffe.io.load_image(in_) | |
| 76 | - im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | |
| 77 | - in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | |
| 78 | - | |
| 79 | - print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | |
| 80 | - in_db_data.close() | |
| 69 | + if Y != None: | |
| 70 | + print('writing image data...') | |
| 71 | + for idx in range(int(math.ceil(len(Y) / 1000.0))): | |
| 72 | + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | |
| 73 | + with in_db_data.begin(write=True) as in_txn: | |
| 74 | + for in_idx, (in_, label_) in enumerate( | |
| 75 | + zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): | |
| 76 | + # im = caffe.io.load_image(in_) | |
| 77 | + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | |
| 78 | + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | |
| 79 | + | |
| 80 | + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | |
| 81 | + in_db_data.close() | |
| 82 | + else: | |
| 83 | + assert isinstance(X[0], tuple) | |
| 84 | + print('writing image data...') | |
| 85 | + for idx in range(int(math.ceil(len(X) / 1000.0))): | |
| 86 | + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | |
| 87 | + with in_db_data.begin(write=True) as in_txn: | |
| 88 | + for in_idx, (in_, label_) in enumerate(X[(1000 * idx):(1000 * (idx + 1))]): | |
| 89 | + # im = caffe.io.load_image(in_) | |
| 90 | + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | |
| 91 | + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | |
| 92 | + | |
| 93 | + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | |
| 94 | + in_db_data.close() | |
| 81 | 95 | |
| 82 | 96 | |
| 83 | 97 | if __name__ == '__main__': | ... | ... |
test/test_data.py
| ... | ... | @@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop |
| 6 | 6 | |
| 7 | 7 | from ..mmodel.caffe.helper import * |
| 8 | 8 | |
| 9 | + | |
| 9 | 10 | def test_MSR(): |
| 10 | 11 | dmsr = MSR.DataMSR() |
| 11 | 12 | # msrd.format() |
| ... | ... | @@ -164,14 +165,11 @@ def test_caffe(): |
| 164 | 165 | # return |
| 165 | 166 | |
| 166 | 167 | dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil') |
| 167 | - X, Y = dil.load_data(mode='local', feattype='coef') | |
| 168 | + X = dil.load_data(mode='local', feattype='coef', shuffle=True) | |
| 168 | 169 | print X[0] |
| 169 | - print Y | |
| 170 | - print np.array(X).shape, np.array(Y).shape | |
| 171 | - | |
| 172 | - write_lmdb(X[2000:3000],Y[2000:3000]) | |
| 173 | - | |
| 170 | + print np.array(X).shape | |
| 174 | 171 | |
| 172 | + write_lmdb(X[7000:]) | |
| 175 | 173 | |
| 176 | 174 | |
| 177 | 175 | if __name__ == '__main__': | ... | ... |