Commit bde8352b4ba5a61935b4d7d0399c063df76951bb
1 parent
e6be6b61
Exists in
master
and in
1 other branch
shuffling.
Showing
3 changed files
with
43 additions
and
25 deletions
Show diff stats
mdata/ILSVRC.py
@@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase): | @@ -299,10 +299,10 @@ class DataILSVRC(DataDumperBase): | ||
299 | # if w < 300 or h < 300: | 299 | # if w < 300 or h < 300: |
300 | # continue | 300 | # continue |
301 | # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) | 301 | # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) |
302 | - # img_crop = img[upper:upper + 300, left:left + 300] | ||
303 | - # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | 302 | + # img_crop = img[upper:upper + 300, left:left + 300] |
303 | + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | ||
304 | # except Exception as e: | 304 | # except Exception as e: |
305 | - # print '[EXCPT]', e | 305 | + # print '[EXCPT]', e |
306 | # pass | 306 | # pass |
307 | 307 | ||
308 | 308 | ||
@@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase): | @@ -439,7 +439,7 @@ class DataILSVRC(DataDumperBase): | ||
439 | pass | 439 | pass |
440 | 440 | ||
441 | 441 | ||
442 | - def load_data(self, mode='local', feattype='ibd', tagtype='class'): | 442 | + def load_data(self, mode='local', feattype='ibd', tagtype='class', shuffle=False): |
443 | INDEX = [] | 443 | INDEX = [] |
444 | X = [] | 444 | X = [] |
445 | Y = [] | 445 | Y = [] |
@@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase): | @@ -461,7 +461,8 @@ class DataILSVRC(DataDumperBase): | ||
461 | 461 | ||
462 | for tag, feat in dict_dataset.values(): | 462 | for tag, feat in dict_dataset.values(): |
463 | feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0 | 463 | feat.ravel()[[i * 200 + j for i in range(0, 200, 8) for j in range(0, 200, 8)]] = 0 |
464 | - # feat = np.bitwise_and(feat, 1) | 464 | + feat = np.absolute(feat) |
465 | + feat = np.bitwise_and(feat, 1) | ||
465 | X.append(feat.ravel()) | 466 | X.append(feat.ravel()) |
466 | Y.append(int(tag)) | 467 | Y.append(int(tag)) |
467 | 468 | ||
@@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase): | @@ -503,8 +504,13 @@ class DataILSVRC(DataDumperBase): | ||
503 | else: | 504 | else: |
504 | raise Exception("Unknown mode!") | 505 | raise Exception("Unknown mode!") |
505 | 506 | ||
506 | - return X, Y | 507 | + if shuffle: |
508 | + # shuffling | ||
509 | + Z = zip(X, Y) | ||
510 | + np.random.shuffle(Z) | ||
511 | + return Z | ||
507 | 512 | ||
513 | + return X, Y | ||
508 | 514 | ||
509 | 515 | ||
510 | 516 |
mmodel/caffe/helper.py
@@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='.. | @@ -61,23 +61,37 @@ def _write_lmdb_raw(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='.. | ||
61 | in_db_data.close() | 61 | in_db_data.close() |
62 | 62 | ||
63 | 63 | ||
64 | -def write_lmdb(X, Y, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): | 64 | +def write_lmdb(X, Y=None, lmdb_name_data='../res/data_lmdb', lmdb_name_label='../res/label_lmdb'): |
65 | """ | 65 | """ |
66 | X - numpy array of data. | 66 | X - numpy array of data. |
67 | Y - numpy array of labels. | 67 | Y - numpy array of labels. |
68 | """ | 68 | """ |
69 | - print('writing image data...') | ||
70 | - for idx in range(int(math.ceil(len(Y) / 1000.0))): | ||
71 | - in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | ||
72 | - with in_db_data.begin(write=True) as in_txn: | ||
73 | - for in_idx, (in_, label_) in enumerate( | ||
74 | - zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): | ||
75 | - # im = caffe.io.load_image(in_) | ||
76 | - im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | ||
77 | - in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | ||
78 | - | ||
79 | - print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | ||
80 | - in_db_data.close() | 69 | + if Y != None: |
70 | + print('writing image data...') | ||
71 | + for idx in range(int(math.ceil(len(Y) / 1000.0))): | ||
72 | + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | ||
73 | + with in_db_data.begin(write=True) as in_txn: | ||
74 | + for in_idx, (in_, label_) in enumerate( | ||
75 | + zip(X[(1000 * idx):(1000 * (idx + 1))], Y[(1000 * idx):(1000 * (idx + 1))])): | ||
76 | + # im = caffe.io.load_image(in_) | ||
77 | + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | ||
78 | + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | ||
79 | + | ||
80 | + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | ||
81 | + in_db_data.close() | ||
82 | + else: | ||
83 | + assert isinstance(X[0], tuple) | ||
84 | + print('writing image data...') | ||
85 | + for idx in range(int(math.ceil(len(X) / 1000.0))): | ||
86 | + in_db_data = lmdb.open(lmdb_name_data, map_size=int(1e12)) | ||
87 | + with in_db_data.begin(write=True) as in_txn: | ||
88 | + for in_idx, (in_, label_) in enumerate(X[(1000 * idx):(1000 * (idx + 1))]): | ||
89 | + # im = caffe.io.load_image(in_) | ||
90 | + im_dat = caffe.io.array_to_datum(np.array(in_, dtype=int).reshape(1, 200, 200), label_) | ||
91 | + in_txn.put('{:0>10d}'.format(1000 * idx + in_idx), im_dat.SerializeToString()) | ||
92 | + | ||
93 | + print str(1000 * idx + in_idx + 1) + ' / ' + str(len(X)) | ||
94 | + in_db_data.close() | ||
81 | 95 | ||
82 | 96 | ||
83 | if __name__ == '__main__': | 97 | if __name__ == '__main__': |
test/test_data.py
@@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop | @@ -6,6 +6,7 @@ from ..mdata import MSR, CV, ILSVRC, ILSVRC_S, crop | ||
6 | 6 | ||
7 | from ..mmodel.caffe.helper import * | 7 | from ..mmodel.caffe.helper import * |
8 | 8 | ||
9 | + | ||
9 | def test_MSR(): | 10 | def test_MSR(): |
10 | dmsr = MSR.DataMSR() | 11 | dmsr = MSR.DataMSR() |
11 | # msrd.format() | 12 | # msrd.format() |
@@ -164,14 +165,11 @@ def test_caffe(): | @@ -164,14 +165,11 @@ def test_caffe(): | ||
164 | # return | 165 | # return |
165 | 166 | ||
166 | dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil') | 167 | dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Train_5000_crop_pil') |
167 | - X, Y = dil.load_data(mode='local', feattype='coef') | 168 | + X = dil.load_data(mode='local', feattype='coef', shuffle=True) |
168 | print X[0] | 169 | print X[0] |
169 | - print Y | ||
170 | - print np.array(X).shape, np.array(Y).shape | ||
171 | - | ||
172 | - write_lmdb(X[2000:3000],Y[2000:3000]) | ||
173 | - | 170 | + print np.array(X).shape |
174 | 171 | ||
172 | + write_lmdb(X[7000:]) | ||
175 | 173 | ||
176 | 174 | ||
177 | if __name__ == '__main__': | 175 | if __name__ == '__main__': |