Commit ec755e37654e61074fe94b3fb10932bb14c0d856
1 parent
2fe06b7f
Exists in
master
and in
1 other branch
cropping.
Showing
2 changed files
with
74 additions
and
14 deletions
Show diff stats
mdata/ILSVRC.py
@@ -22,6 +22,7 @@ import numpy as np | @@ -22,6 +22,7 @@ import numpy as np | ||
22 | from numpy.random import randn | 22 | from numpy.random import randn |
23 | import pandas as pd | 23 | import pandas as pd |
24 | from scipy import stats | 24 | from scipy import stats |
25 | +import random | ||
25 | 26 | ||
26 | from subprocess import Popen, PIPE, STDOUT | 27 | from subprocess import Popen, PIPE, STDOUT |
27 | 28 | ||
@@ -268,6 +269,39 @@ class DataILSVRC(DataDumperBase): | @@ -268,6 +269,39 @@ class DataILSVRC(DataDumperBase): | ||
268 | def embed(self, rate=None): | 269 | def embed(self, rate=None): |
269 | self._embed_inner(rate) | 270 | self._embed_inner(rate) |
270 | 271 | ||
272 | + | ||
273 | + def crop(self, size=(300, 300)): | ||
274 | + for path, subdirs, files in os.walk(self.data_dir): | ||
275 | + for name in files: | ||
276 | + image = os.path.join(path, name) | ||
277 | + print image | ||
278 | + | ||
279 | + W, H = size | ||
280 | + try: | ||
281 | + im = Image.open(image) | ||
282 | + w, h = im.size | ||
283 | + if w < W or h < H: | ||
284 | + continue | ||
285 | + left, upper = random.randint(0, w - W), random.randint(0, h - H) | ||
286 | + im = im.crop((left, upper, left + W, upper + H)) | ||
287 | + im.save(os.path.join(self.data_dir + '_crop_pil', name)) | ||
288 | + except Exception as e: | ||
289 | + print '[EXCPT]', e | ||
290 | + pass | ||
291 | + | ||
292 | + # try: | ||
293 | + # img = cv2.imread(image, cv2.CV_LOAD_IMAGE_UNCHANGED) | ||
294 | + # h, w = img.shape[:2] | ||
295 | + # if w < 300 or h < 300: | ||
296 | + # continue | ||
297 | + # left, upper = random.randint(0, w - 300), random.randint(0, h - 300) | ||
298 | + # img_crop = img[upper:upper + 300, left:left + 300] | ||
299 | + # cv2.imwrite(os.path.join(base_dir, category + '_crop_cv', name), img_crop) | ||
300 | + # except Exception as e: | ||
301 | + # print '[EXCPT]', e | ||
302 | + # pass | ||
303 | + | ||
304 | + | ||
271 | def get_table(self): | 305 | def get_table(self): |
272 | if self.table != None: | 306 | if self.table != None: |
273 | return self.table | 307 | return self.table |
@@ -410,30 +444,44 @@ class DataILSVRC(DataDumperBase): | @@ -410,30 +444,44 @@ class DataILSVRC(DataDumperBase): | ||
410 | 444 | ||
411 | dict_dataset = {} | 445 | dict_dataset = {} |
412 | 446 | ||
413 | - with open(self.list_file, 'rb') as tsvfile: | ||
414 | - tsvfile = csv.reader(tsvfile, delimiter='\t') | ||
415 | - for line in tsvfile: | ||
416 | - hash = line[0] | ||
417 | - tag = line[-1] | ||
418 | - path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) | ||
419 | - if path_feat: | ||
420 | - with open(path_feat, 'rb') as featfile: | ||
421 | - dict_dataset[hash] = (tag, json.loads(featfile.read())) | 447 | + if feattype == 'coef': # raw |
448 | + with open(self.list_file, 'rb') as tsvfile: | ||
449 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | ||
450 | + for line in tsvfile: | ||
451 | + hash = line[0] | ||
452 | + tag = line[-1] | ||
453 | + image = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.jpg') | ||
454 | + if image: | ||
455 | + im = Jpeg(image, key=sample_key) | ||
456 | + dict_dataset[hash] = (tag, im.getCoefBlocks('Y')) | ||
457 | + | ||
458 | + else: | ||
459 | + with open(self.list_file, 'rb') as tsvfile: | ||
460 | + tsvfile = csv.reader(tsvfile, delimiter='\t') | ||
461 | + for line in tsvfile: | ||
462 | + hash = line[0] | ||
463 | + tag = line[-1] | ||
464 | + path_feat = os.path.join(self.feat_dir, hash[:3], hash[3:] + '.' + feattype) | ||
465 | + if path_feat: | ||
466 | + with open(path_feat, 'rb') as featfile: | ||
467 | + dict_dataset[hash] = (tag, json.loads(featfile.read())) | ||
422 | 468 | ||
423 | for tag, feat in dict_dataset.values(): | 469 | for tag, feat in dict_dataset.values(): |
424 | - X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) | 470 | + # X.append([item for sublist in feat for subsublist in sublist for item in subsublist]) |
471 | + X.append(np.array(feat).ravel().tolist()) | ||
425 | Y.append(int(tag)) | 472 | Y.append(int(tag)) |
426 | 473 | ||
427 | - elif mode == "remote" or mode == "hbase": | 474 | + elif mode == "hbase": # remote |
428 | if self.table == None: | 475 | if self.table == None: |
429 | self.table = self.get_table() | 476 | self.table = self.get_table() |
430 | 477 | ||
431 | col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype | 478 | col_feat, col_tag = 'cf_feat:' + feattype, 'cf_tag:' + tagtype |
432 | for key, data in self.table.scan(columns=[col_feat, col_tag]): | 479 | for key, data in self.table.scan(columns=[col_feat, col_tag]): |
433 | - X.append([item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | 480 | + X.append( |
481 | + [item for sublist in json.loads(data[col_feat]) for subsublist in sublist for item in subsublist]) | ||
434 | Y.append(int(data[col_tag])) | 482 | Y.append(int(data[col_tag])) |
435 | 483 | ||
436 | - elif mode == "spark" or mode == "cluster": | 484 | + elif mode == "spark": # cluster |
437 | if self.sparker == None: | 485 | if self.sparker == None: |
438 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') | 486 | self.sparker = SC.Sparker(host='HPC-server', appname='ImageCV', master='spark://HPC-server:7077') |
439 | 487 |
test/test_data.py
@@ -135,7 +135,19 @@ def test_pipeline(): | @@ -135,7 +135,19 @@ def test_pipeline(): | ||
135 | 135 | ||
136 | 136 | ||
137 | def test_crop(): | 137 | def test_crop(): |
138 | - crop.crop_Test() | 138 | + # crop.crop_Test() |
139 | + dil = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1') | ||
140 | + dil.crop() | ||
141 | + | ||
142 | + dil2 = ILSVRC.DataILSVRC(base_dir='/data/hadoop/ImageNet/ILSVRC/ILSVRC2013_DET_val', category='Test_1_crop_pil') | ||
143 | + | ||
144 | + dil.format() | ||
145 | + dil.embed(rate=0.2) | ||
146 | + | ||
147 | + X,Y = dil2.load_data(mode='local',feattype='coef') | ||
148 | + print X[0] | ||
149 | + print Y | ||
150 | + | ||
139 | 151 | ||
140 | if __name__ == '__main__': | 152 | if __name__ == '__main__': |
141 | # test_MSR() | 153 | # test_MSR() |