Commit ec755e37654e61074fe94b3fb10932bb14c0d856
1 parent
2fe06b7f
Exists in
master
and in
1 other branch
cropping.
Showing
2 changed files
with
74 additions
and
14 deletions
Show diff stats
mdata/ILSVRC.py
... | ... | @@ -22,6 +22,7 @@ import numpy as np |
22 | 22 | from numpy.random import randn |
23 | 23 | import pandas as pd |
24 | 24 | from scipy import stats |
25 | +import random | |
25 | 26 | |
26 | 27 | from subprocess import Popen, PIPE, STDOUT |
27 | 28 | |
... | ... | @@ -268,6 +269,39 @@ class DataILSVRC(DataDumperBase): |
268 | 269 | def embed(self, rate=None): |
269 | 270 | self._embed_inner(rate) |
270 | 271 | |
272 | + | |
273 | + def crop(self, size=(300, 300)): | |
274 | + for path, subdirs, files in os.walk(self.data_dir): | |
275 | + for name in files: | |
276 | + image = os.path.join(path, name) | |
277 | + print image | |
278 | + | |
279 | + W, H = size | |
280 | + try: | |
281 | + im = Image.open(image) | |
282 | + w, h = im.size | |
283 | + if w < W or h < H: | |
284 | + continue | |
285 | + left, upper = random.randint(0, w - W), random.randint(0, h - H) | |
286 | + im = im.crop((left, upper, left + W, upper + H)) | |
287 | + im.save(os.path.join(self.data_dir + '_crop_pil', name)) | |
288 | + except Exception as e: | |
289 | + print '[EXCPT]', e | |
290 | + pass | |
291 | + | |
292 | + # try: | |
293 | + # img = cv2.imread(image, cv2.CV_LOAD_IMAGE_UNCHANGED) | |
294 | + # h, w = img.shape[:2] | |
295 | + # if w < 300 or h < 300: | |
296 | + # continue | |
297 | + # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) | |
298 | + # img_crop = img[upper:upper + 300, left:left + 300] | |
299 | + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | |
300 | + # except Exception as e: | |
301 | + # print '[EXCPT]', e | |
302 | + # pass | |
303 | + | |
304 | + | |
271 | 305 | def get_table(self): |
272 | 306 | if self.table != None: |
273 | 307 | return self.table |
... | ... | @@ -410,30 +444,44 @@ class DataILSVRC(DataDumperBase): |
410 | 444 | |
411 | 445 | dict_dataset = {} |
412 | 446 | |
413 | - with open(self.list_file, 'rb') as tsvfile: | |
414 | - tsvfile = csv.reader(tsvfile, delimiter='\t') | |
415 | - for line in tsvfile: | |
416 | - hash = line[0] | |
417 | - tag = line[-1] | |
418 | - path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) | |
419 | - if path_feat: | |
420 | - with open(path_feat, 'rb') as featfile: | |
421 | - dict_dataset[hash] = (tag, json.loads(featfile.read())) | |
447 | + if feattype == 'coef': # raw | |
448 | + with open(self.list_file, 'rb') as tsvfile: | |
449 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | |
450 | + for line in tsvfile: | |
451 | + hash = line[0] | |
452 | + tag = line[-1] | |
453 | + image = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.jpg') | |
454 | + if image: | |
455 | + im = Jpeg(image, key=sample_key) | |
456 | + dict_dataset[hash] = (tag, im.getCoefBlocks('Y')) | |
457 | + | |
458 | + else: | |
459 | + with open(self.list_file, 'rb') as tsvfile: | |
460 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | |
461 | + for line in tsvfile: | |
462 | + hash = line[0] | |
463 | + tag = line[-1] | |
464 | + path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) | |
465 | + if path_feat: | |
466 | + with open(path_feat, 'rb') as featfile: | |
467 | + dict_dataset[hash] = (tag, json.loads(featfile.read())) | |
422 | 468 | |
423 | 469 | for tag, feat in dict_dataset.values(): |
424 | - X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) | |
470 | + # X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) | |
471 | + X.append(np.array(feat).ravel().tolist()) | |
425 | 472 | Y.append(int(tag)) |
426 | 473 | |
427 | - elif mode == "remote" or mode == "hbase": | |
474 | + elif mode == "hbase": # remote | |
428 | 475 | if self.table == None: |
429 | 476 | self.table = self.get_table() |
430 | 477 | |
431 | 478 | col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype |
432 | 479 | for key, data in self.table.scan(columns=[col_feat, col_tag]): |
433 | - X.append([item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | |
480 | + X.append( | |
481 | + [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | |
434 | 482 | Y.append(int(data[col_tag])) |
435 | 483 | |
436 | - elif mode == "spark" or mode == "cluster": | |
484 | + elif mode == "spark": # cluster | |
437 | 485 | if self.sparker == None: |
438 | 486 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') |
439 | 487 | ... | ... |
test/test_data.py
... | ... | @@ -135,7 +135,19 @@ def test_pipeline(): |
135 | 135 | |
136 | 136 | |
137 | 137 | def test_crop(): |
138 | - crop.crop_Test() | |
138 | + # crop.crop_Test() | |
139 | + dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1') | |
140 | + dil.crop() | |
141 | + | |
142 | + dil2 = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1_crop_pil') | |
143 | + | |
144 | + dil.format() | |
145 | + dil.embed(rate=0.2) | |
146 | + | |
147 | + X,Y = dil2.load_data(mode='local',feattype='coef') | |
148 | + print X[0] | |
149 | + print Y | |
150 | + | |
139 | 151 | |
140 | 152 | if __name__ == '__main__': |
141 | 153 | # test_MSR() | ... | ... |